Commit @9bf89543df52dbb2be03828a9099255a2339a5d3 - yjyoon/whisper_streaming

Dominik Machacek 2024-05-28

bugfix

@9bf89543df52dbb2be03828a9099255a2339a5d3

29cc175

9bf8954

whisper_online_server.py

--- whisper_online_server.py

+++ whisper_online_server.py


 
         self.last_end = None
 
+        self.is_first = True
+
     def receive_audio_chunk(self):
         # receive all audio that is available by this time
         # blocks operation if less than self.min_chunk seconds is available
         # unblocks if connection is closed or a chunk is available
         out = []
-        while sum(len(x) for x in out) < self.min_chunk*SAMPLING_RATE:
+        minlimit = self.min_chunk*SAMPLING_RATE
+        while sum(len(x) for x in out) < minlimit:
             raw_bytes = self.connection.non_blocking_receive_audio()
-            print("received audio:",len(raw_bytes), "bytes", raw_bytes[:10])
             if not raw_bytes:
                 break
+            print("received audio:",len(raw_bytes), "bytes", raw_bytes[:10])
             sf = soundfile.SoundFile(io.BytesIO(raw_bytes), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW")
             audio, _ = librosa.load(sf,sr=SAMPLING_RATE)
             out.append(audio)
         if not out:
             return None
+        conc = np.concatenate(out)
+        if self.is_first and len(conc) < minlimit:
+            return None
+        self.is_first = False
         return np.concatenate(out)
 
     def format_output_transcript(self,o):

Add a comment

Open 0
Closed 0

List

...	...	@@ -130,21 +130,28 @@
130	130
131	131	self.last_end = None
132	132
	133	+ self.is_first = True
	134	+
133	135	def receive_audio_chunk(self):
134	136	# receive all audio that is available by this time
135	137	# blocks operation if less than self.min_chunk seconds is available
136	138	# unblocks if connection is closed or a chunk is available
137	139	out = []
138		- while sum(len(x) for x in out) < self.min_chunk*SAMPLING_RATE:
	140	+ minlimit = self.min_chunk*SAMPLING_RATE
	141	+ while sum(len(x) for x in out) < minlimit:
139	142	raw_bytes = self.connection.non_blocking_receive_audio()
140		- print("received audio:",len(raw_bytes), "bytes", raw_bytes[:10])
141	143	if not raw_bytes:
142	144	break
	145	+ print("received audio:",len(raw_bytes), "bytes", raw_bytes[:10])
143	146	sf = soundfile.SoundFile(io.BytesIO(raw_bytes), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW")
144	147	audio, _ = librosa.load(sf,sr=SAMPLING_RATE)
145	148	out.append(audio)
146	149	if not out:
147	150	return None
	151	+ conc = np.concatenate(out)
	152	+ if self.is_first and len(conc) < minlimit:
	153	+ return None
	154	+ self.is_first = False
148	155	return np.concatenate(out)
149	156
150	157	def format_output_transcript(self,o):

Delete comment