Dominik Macháček 2024-02-07
increasing timestamps fixed
but the code needs to be simplified and cleaned before merging
@175c382b7b1663fdee6316f039426d4c93c27e44
whisper_online.py
--- whisper_online.py
+++ whisper_online.py
@@ -235,16 +235,19 @@
 
         self.buffer_trimming_way, self.buffer_trimming_sec = buffer_trimming
 
-    def init(self):
+    def init(self, keep_offset=False):
         """run this when starting or restarting processing"""
         self.audio_buffer = np.array([],dtype=np.float32)
-        self.buffer_time_offset = 0
-
         self.transcript_buffer = HypothesisBuffer(logfile=self.logfile)
+        if not keep_offset:
+            self.buffer_time_offset = 0
+            self.transcript_buffer.last_commited_time = 0
+        else:
+            self.transcript_buffer.last_commited_time = self.buffer_time_offset
+
         self.commited = []
         self.last_chunked_at = 0
 
-        self.silence_iters = 0
 
     def insert_audio_chunk(self, audio):
         self.audio_buffer = np.append(self.audio_buffer, audio)
@@ -400,6 +403,7 @@
         o = self.transcript_buffer.complete()
         f = self.to_flush(o)
         print("last, noncommited:",f,file=self.logfile)
+        self.buffer_time_offset += len(self.audio_buffer)/16000
         return f
 
 
whisper_online_vac.py
--- whisper_online_vac.py
+++ whisper_online_vac.py
@@ -45,7 +45,7 @@
 
     def finish(self):
         ret = self.online.finish()
-        self.online.init()
+        self.online.init(keep_offset=True)
         self.current_online_chunk_buffer_size = 0
         return ret
 
Add a comment
List