

remove mic test and streams
@74b80e376624506dc4625dc413458d2d1abaea4a
--- mic_test_whisper_simple.py
... | ... | @@ -1,95 +0,0 @@ |
1 | -from microphone_stream import MicrophoneStream | |
2 | -from voice_activity_controller import VoiceActivityController | |
3 | -from whisper_online import * | |
4 | -import numpy as np | |
5 | -import librosa | |
6 | -import io | |
7 | -import soundfile | |
8 | -import sys | |
9 | - | |
10 | - | |
11 | - | |
12 | - | |
13 | -class SimpleASRProcessor: | |
14 | - | |
15 | - def __init__(self, asr, sampling_rate = 16000): | |
16 | - """run this when starting or restarting processing""" | |
17 | - self.audio_buffer = np.array([],dtype=np.float32) | |
18 | - self.prompt_buffer = "" | |
19 | - self.asr = asr | |
20 | - self.sampling_rate = sampling_rate | |
21 | - self.init_prompt = '' | |
22 | - | |
23 | - def ts_words(self, segments): | |
24 | - result = "" | |
25 | - for segment in segments: | |
26 | - if segment.no_speech_prob > 0.9: | |
27 | - continue | |
28 | - for word in segment.words: | |
29 | - w = word.word | |
30 | - t = (word.start, word.end, w) | |
31 | - result +=w | |
32 | - return result | |
33 | - | |
34 | - def stream_process(self, vad_result): | |
35 | - iter_in_phrase = 0 | |
36 | - for chunk, is_final in vad_result: | |
37 | - iter_in_phrase += 1 | |
38 | - | |
39 | - if chunk is not None: | |
40 | - sf = soundfile.SoundFile(io.BytesIO(chunk), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW") | |
41 | - audio, _ = librosa.load(sf,sr=SAMPLING_RATE) | |
42 | - out = [] | |
43 | - out.append(audio) | |
44 | - a = np.concatenate(out) | |
45 | - self.audio_buffer = np.append(self.audio_buffer, a) | |
46 | - | |
47 | - if is_final and len(self.audio_buffer) > 0: | |
48 | - res = self.asr.transcribe(self.audio_buffer, init_prompt=self.init_prompt) | |
49 | - tsw = self.ts_words(res) | |
50 | - | |
51 | - self.init_prompt = self.init_prompt + tsw | |
52 | - self.init_prompt = self.init_prompt [-100:] | |
53 | - self.audio_buffer.resize(0) | |
54 | - iter_in_phrase =0 | |
55 | - | |
56 | - yield True, tsw | |
57 | - # show progress evry 50 chunks | |
58 | - elif iter_in_phrase % 50 == 0 and len(self.audio_buffer) > 0: | |
59 | - res = self.asr.transcribe(self.audio_buffer, init_prompt=self.init_prompt) | |
60 | - # use custom ts_words | |
61 | - tsw = self.ts_words(res) | |
62 | - yield False, tsw | |
63 | - | |
64 | - | |
65 | - | |
66 | - | |
67 | - | |
68 | - | |
69 | - | |
70 | -SAMPLING_RATE = 16000 | |
71 | - | |
72 | -model = "large-v2" | |
73 | -src_lan = "en" # source language | |
74 | -tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used | |
75 | -use_vad = False | |
76 | -min_sample_length = 1 * SAMPLING_RATE | |
77 | - | |
78 | - | |
79 | - | |
80 | -vac = VoiceActivityController(use_vad_result = use_vad) | |
81 | -asr = FasterWhisperASR(src_lan, "large-v2") # loads and wraps Whisper model | |
82 | - | |
83 | -tokenizer = create_tokenizer(tgt_lan) | |
84 | -online = SimpleASRProcessor(asr) | |
85 | - | |
86 | - | |
87 | -stream = MicrophoneStream() | |
88 | -stream = vac.detect_user_speech(stream, audio_in_int16 = False) | |
89 | -stream = online.stream_process(stream) | |
90 | - | |
91 | -for isFinal, text in stream: | |
92 | - if isFinal: | |
93 | - print( text, end="\r\n") | |
94 | - else: | |
95 | - print( text, end="\r") |
--- mic_test_whisper_streaming.py
... | ... | @@ -1,71 +0,0 @@ |
1 | -from microphone_stream import MicrophoneStream | |
2 | -from voice_activity_controller import VoiceActivityController | |
3 | -from whisper_online import * | |
4 | -import numpy as np | |
5 | -import librosa | |
6 | -import io | |
7 | -import soundfile | |
8 | -import sys | |
9 | - | |
10 | - | |
11 | -SAMPLING_RATE = 16000 | |
12 | -model = "large-v2" | |
13 | -src_lan = "en" # source language | |
14 | -tgt_lan = "en" # target language -- same as source for ASR, "en" if translate task is used | |
15 | -use_vad_result = True | |
16 | -min_sample_length = 1 * SAMPLING_RATE | |
17 | - | |
18 | - | |
19 | - | |
20 | -asr = FasterWhisperASR(src_lan, model) # loads and wraps Whisper model | |
21 | -tokenizer = create_tokenizer(tgt_lan) # sentence segmenter for the target language | |
22 | -online = OnlineASRProcessor(asr, tokenizer) # create processing object | |
23 | - | |
24 | -microphone_stream = MicrophoneStream() | |
25 | -vad = VoiceActivityController(use_vad_result = use_vad_result) | |
26 | - | |
27 | -complete_text = '' | |
28 | -final_processing_pending = False | |
29 | -out = [] | |
30 | -out_len = 0 | |
31 | -for iter in vad.detect_user_speech(microphone_stream): # processing loop: | |
32 | - raw_bytes= iter[0] | |
33 | - is_final = iter[1] | |
34 | - | |
35 | - if raw_bytes: | |
36 | - sf = soundfile.SoundFile(io.BytesIO(raw_bytes), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW") | |
37 | - audio, _ = librosa.load(sf,sr=SAMPLING_RATE) | |
38 | - out.append(audio) | |
39 | - out_len += len(audio) | |
40 | - | |
41 | - | |
42 | - if (is_final or out_len >= min_sample_length) and out_len>0: | |
43 | - a = np.concatenate(out) | |
44 | - online.insert_audio_chunk(a) | |
45 | - | |
46 | - if out_len > min_sample_length: | |
47 | - o = online.process_iter() | |
48 | - print('-----'*10) | |
49 | - complete_text = complete_text + o[2] | |
50 | - print('PARTIAL - '+ complete_text) # do something with current partial output | |
51 | - print('-----'*10) | |
52 | - out = [] | |
53 | - out_len = 0 | |
54 | - | |
55 | - if is_final: | |
56 | - o = online.finish() | |
57 | - # final_processing_pending = False | |
58 | - print('-----'*10) | |
59 | - complete_text = complete_text + o[2] | |
60 | - print('FINAL - '+ complete_text) # do something with current partial output | |
61 | - print('-----'*10) | |
62 | - online.init() | |
63 | - out = [] | |
64 | - out_len = 0 | |
65 | - | |
66 | - | |
67 | - | |
68 | - | |
69 | - | |
70 | - | |
71 | - |
--- microphone_stream.py
... | ... | @@ -1,82 +0,0 @@ |
1 | - | |
2 | - | |
3 | -### mic stream | |
4 | - | |
5 | -import queue | |
6 | -import re | |
7 | -import sys | |
8 | -import pyaudio | |
9 | - | |
10 | - | |
11 | -class MicrophoneStream: | |
12 | - def __init__( | |
13 | - self, | |
14 | - sample_rate: int = 16000, | |
15 | - ): | |
16 | - """ | |
17 | - Creates a stream of audio from the microphone. | |
18 | - | |
19 | - Args: | |
20 | - chunk_size: The size of each chunk of audio to read from the microphone. | |
21 | - channels: The number of channels to record audio from. | |
22 | - sample_rate: The sample rate to record audio at. | |
23 | - """ | |
24 | - try: | |
25 | - import pyaudio | |
26 | - except ImportError: | |
27 | - raise Exception('py audio not installed') | |
28 | - | |
29 | - self._pyaudio = pyaudio.PyAudio() | |
30 | - self.sample_rate = sample_rate | |
31 | - | |
32 | - self._chunk_size = int(self.sample_rate * 40 / 1000) | |
33 | - self._stream = self._pyaudio.open( | |
34 | - format=pyaudio.paInt16, | |
35 | - channels=1, | |
36 | - rate=sample_rate, | |
37 | - input=True, | |
38 | - frames_per_buffer=self._chunk_size, | |
39 | - ) | |
40 | - | |
41 | - self._open = True | |
42 | - | |
43 | - def __iter__(self): | |
44 | - """ | |
45 | - Returns the iterator object. | |
46 | - """ | |
47 | - | |
48 | - return self | |
49 | - | |
50 | - def __next__(self): | |
51 | - """ | |
52 | - Reads a chunk of audio from the microphone. | |
53 | - """ | |
54 | - if not self._open: | |
55 | - raise StopIteration | |
56 | - | |
57 | - try: | |
58 | - return self._stream.read(self._chunk_size) | |
59 | - except KeyboardInterrupt: | |
60 | - raise StopIteration | |
61 | - | |
62 | - def close(self): | |
63 | - """ | |
64 | - Closes the stream. | |
65 | - """ | |
66 | - | |
67 | - self._open = False | |
68 | - | |
69 | - if self._stream.is_active(): | |
70 | - self._stream.stop_stream() | |
71 | - | |
72 | - self._stream.close() | |
73 | - self._pyaudio.terminate() | |
74 | - | |
75 | - | |
76 | - | |
77 | - | |
78 | - | |
79 | - | |
80 | - | |
81 | - | |
82 | - |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?