Dominik Macháček 2024-08-19
remove mic test and streams
@74b80e376624506dc4625dc413458d2d1abaea4a
 
mic_test_whisper_simple.py (deleted)
--- mic_test_whisper_simple.py
@@ -1,95 +0,0 @@
-from microphone_stream import MicrophoneStream
-from voice_activity_controller import VoiceActivityController
-from whisper_online import *
-import numpy as np
-import librosa  
-import io
-import soundfile
-import sys
-
-
-
-
-class SimpleASRProcessor:
-
-    def __init__(self, asr, sampling_rate = 16000):
-        """run this when starting or restarting processing"""
-        self.audio_buffer = np.array([],dtype=np.float32)
-        self.prompt_buffer = ""
-        self.asr = asr
-        self.sampling_rate = sampling_rate
-        self.init_prompt = ''
-
-    def ts_words(self, segments):
-        result = ""
-        for segment in segments:
-            if segment.no_speech_prob > 0.9:
-                continue
-            for word in segment.words:
-                w = word.word
-                t = (word.start, word.end, w)
-                result +=w
-        return result 
-
-    def stream_process(self, vad_result):
-        iter_in_phrase = 0
-        for chunk, is_final in vad_result:
-            iter_in_phrase += 1
-
-            if chunk is not None:
-                sf = soundfile.SoundFile(io.BytesIO(chunk), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW")
-                audio, _ = librosa.load(sf,sr=SAMPLING_RATE)
-                out = []
-                out.append(audio)
-                a = np.concatenate(out)
-                self.audio_buffer = np.append(self.audio_buffer, a)
-
-            if is_final and len(self.audio_buffer) > 0:
-                res = self.asr.transcribe(self.audio_buffer, init_prompt=self.init_prompt)
-                tsw = self.ts_words(res)
-                
-                self.init_prompt = self.init_prompt + tsw
-                self.init_prompt  = self.init_prompt [-100:]
-                self.audio_buffer.resize(0)
-                iter_in_phrase =0
-                
-                yield True, tsw
-            # show progress evry 50 chunks
-            elif iter_in_phrase % 50 == 0 and len(self.audio_buffer) > 0:
-                res = self.asr.transcribe(self.audio_buffer, init_prompt=self.init_prompt)
-                # use custom ts_words
-                tsw = self.ts_words(res)
-                yield False, tsw
-            
-        
-
-
-
-
-
-SAMPLING_RATE = 16000
-
-model = "large-v2"
-src_lan = "en"  # source language
-tgt_lan = "en"  # target language  -- same as source for ASR, "en" if translate task is used
-use_vad = False
-min_sample_length = 1 * SAMPLING_RATE
-
-
-
-vac = VoiceActivityController(use_vad_result = use_vad)
-asr = FasterWhisperASR(src_lan, "large-v2")  # loads and wraps Whisper model
-
-tokenizer = create_tokenizer(tgt_lan)
-online = SimpleASRProcessor(asr)
-
-
-stream = MicrophoneStream()
-stream = vac.detect_user_speech(stream, audio_in_int16 = False) 
-stream = online.stream_process(stream)
-
-for isFinal, text in stream:
-    if isFinal:
-        print( text,  end="\r\n")
-    else:
-        print( text,  end="\r")
 
mic_test_whisper_streaming.py (deleted)
--- mic_test_whisper_streaming.py
@@ -1,71 +0,0 @@
-from microphone_stream import MicrophoneStream
-from voice_activity_controller import VoiceActivityController
-from whisper_online import *
-import numpy as np
-import librosa  
-import io
-import soundfile
-import sys
-
-
-SAMPLING_RATE = 16000
-model = "large-v2"
-src_lan = "en"  # source language
-tgt_lan = "en"  # target language  -- same as source for ASR, "en" if translate task is used
-use_vad_result = True
-min_sample_length = 1 * SAMPLING_RATE
-
-
-
-asr = FasterWhisperASR(src_lan, model)  # loads and wraps Whisper model
-tokenizer = create_tokenizer(tgt_lan)  # sentence segmenter for the target language
-online = OnlineASRProcessor(asr, tokenizer)  # create processing object
-
-microphone_stream = MicrophoneStream() 
-vad = VoiceActivityController(use_vad_result = use_vad_result)
-
-complete_text = ''
-final_processing_pending = False
-out = []
-out_len = 0
-for iter in vad.detect_user_speech(microphone_stream):   # processing loop:
-    raw_bytes=  iter[0]
-    is_final =  iter[1]
-
-    if  raw_bytes:
-        sf = soundfile.SoundFile(io.BytesIO(raw_bytes), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW")
-        audio, _ = librosa.load(sf,sr=SAMPLING_RATE)
-        out.append(audio)
-        out_len += len(audio)
-
-    
-    if (is_final or out_len >= min_sample_length) and out_len>0:
-        a = np.concatenate(out)
-        online.insert_audio_chunk(a)    
-        
-    if out_len > min_sample_length:
-        o = online.process_iter()
-        print('-----'*10)
-        complete_text = complete_text + o[2]
-        print('PARTIAL - '+ complete_text) # do something with current partial output
-        print('-----'*10)     
-        out = []
-        out_len = 0   
-
-    if is_final:
-        o = online.finish()
-        # final_processing_pending = False         
-        print('-----'*10)
-        complete_text = complete_text + o[2]
-        print('FINAL - '+ complete_text) # do something with current partial output
-        print('-----'*10)   
-        online.init()   
-        out = []
-        out_len = 0    
-        
-
-
-
-
-
-
 
microphone_stream.py (deleted)
--- microphone_stream.py
@@ -1,82 +0,0 @@
-
-
-### mic stream
-
-import queue
-import re
-import sys
-import pyaudio
-
-
-class MicrophoneStream:
-    def __init__(
-        self,
-        sample_rate: int = 16000,
-    ):
-        """
-        Creates a stream of audio from the microphone.
-
-        Args:
-            chunk_size: The size of each chunk of audio to read from the microphone.
-            channels: The number of channels to record audio from.
-            sample_rate: The sample rate to record audio at.
-        """
-        try:
-            import pyaudio
-        except ImportError:
-            raise Exception('py audio not installed')
-
-        self._pyaudio = pyaudio.PyAudio()
-        self.sample_rate = sample_rate
-
-        self._chunk_size = int(self.sample_rate * 40  / 1000)
-        self._stream = self._pyaudio.open(
-            format=pyaudio.paInt16,
-            channels=1,
-            rate=sample_rate,
-            input=True,
-            frames_per_buffer=self._chunk_size,
-        )
-
-        self._open = True
-
-    def __iter__(self):
-        """
-        Returns the iterator object.
-        """
-
-        return self
-
-    def __next__(self):
-        """
-        Reads a chunk of audio from the microphone.
-        """
-        if not self._open:
-            raise StopIteration
-
-        try:
-            return self._stream.read(self._chunk_size)
-        except KeyboardInterrupt:
-            raise StopIteration
-
-    def close(self):
-        """
-        Closes the stream.
-        """
-
-        self._open = False
-
-        if self._stream.is_active():
-            self._stream.stop_stream()
-
-        self._stream.close()
-        self._pyaudio.terminate()
-
-
-
-
-
-
-
-
-
Add a comment
List