

backend import in child load_model method and expose logfile arg
@cc7e524fc4c5be39875538e4b944c2a846b72316
--- whisper_online.py
+++ whisper_online.py
... | ... | @@ -30,11 +30,7 @@ |
30 | 30 |
self.transcribe_kargs = {} |
31 | 31 |
self.original_language = lan |
32 | 32 |
|
33 |
- self.import_backend() |
|
34 | 33 |
self.model = self.load_model(modelsize, cache_dir, model_dir) |
35 |
- |
|
36 |
- def import_backend(self): |
|
37 |
- raise NotImplemented("must be implemented in the child class") |
|
38 | 34 |
|
39 | 35 |
def load_model(self, modelsize, cache_dir): |
40 | 36 |
raise NotImplemented("must be implemented in the child class") |
... | ... | @@ -52,15 +48,13 @@ |
52 | 48 |
""" |
53 | 49 |
|
54 | 50 |
sep = " " |
55 |
- |
|
56 |
- def import_backend(self): |
|
57 |
- global whisper, whisper_timestamped |
|
58 |
- import whisper |
|
59 |
- import whisper_timestamped |
|
60 | 51 |
|
61 | 52 |
def load_model(self, modelsize=None, cache_dir=None, model_dir=None): |
53 |
+ global whisper_timestamped # has to be global as it is used at each `transcribe` call |
|
54 |
+ import whisper |
|
55 |
+ import whisper_timestamped |
|
62 | 56 |
if model_dir is not None: |
63 |
- print("ignoring model_dir, not implemented",file=self.output) |
|
57 |
+ print("ignoring model_dir, not implemented",file=self.logfile) |
|
64 | 58 |
return whisper.load_model(modelsize, download_root=cache_dir) |
65 | 59 |
|
66 | 60 |
def transcribe(self, audio, init_prompt=""): |
... | ... | @@ -89,13 +83,10 @@ |
89 | 83 |
|
90 | 84 |
sep = "" |
91 | 85 |
|
92 |
- def import_backend(self): |
|
93 |
- global faster_whisper |
|
94 |
- import faster_whisper |
|
95 |
- |
|
96 | 86 |
def load_model(self, modelsize=None, cache_dir=None, model_dir=None): |
87 |
+ from faster_whisper import WhisperModel |
|
97 | 88 |
if model_dir is not None: |
98 |
- print(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.",file=self.output) |
|
89 |
+ print(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.",file=self.logfile) |
|
99 | 90 |
model_size_or_path = model_dir |
100 | 91 |
elif modelsize is not None: |
101 | 92 |
model_size_or_path = modelsize |
... | ... | @@ -143,7 +134,7 @@ |
143 | 134 |
|
144 | 135 |
class HypothesisBuffer: |
145 | 136 |
|
146 |
- def __init__(self, output=sys.stderr): |
|
137 |
+ def __init__(self, logfile=sys.stderr): |
|
147 | 138 |
"""output: where to store the log. Leave it unchanged to print to terminal.""" |
148 | 139 |
self.commited_in_buffer = [] |
149 | 140 |
self.buffer = [] |
... | ... | @@ -152,7 +143,7 @@ |
152 | 143 |
self.last_commited_time = 0 |
153 | 144 |
self.last_commited_word = None |
154 | 145 |
|
155 |
- self.output = output |
|
146 |
+ self.logfile = logfile |
|
156 | 147 |
|
157 | 148 |
def insert(self, new, offset): |
158 | 149 |
# compare self.commited_in_buffer and new. It inserts only the words in new that extend the commited_in_buffer, it means they are roughly behind last_commited_time and new in content |
... | ... | @@ -172,9 +163,9 @@ |
172 | 163 |
c = " ".join([self.commited_in_buffer[-j][2] for j in range(1,i+1)][::-1]) |
173 | 164 |
tail = " ".join(self.new[j-1][2] for j in range(1,i+1)) |
174 | 165 |
if c == tail: |
175 |
- print("removing last",i,"words:",file=self.output) |
|
166 |
+ print("removing last",i,"words:",file=self.logfile) |
|
176 | 167 |
for j in range(i): |
177 |
- print("\t",self.new.pop(0),file=self.output) |
|
168 |
+ print("\t",self.new.pop(0),file=self.logfile) |
|
178 | 169 |
break |
179 | 170 |
|
180 | 171 |
def flush(self): |
... | ... | @@ -211,14 +202,14 @@ |
211 | 202 |
|
212 | 203 |
SAMPLING_RATE = 16000 |
213 | 204 |
|
214 |
- def __init__(self, asr, tokenizer, output=sys.stderr): |
|
205 |
+ def __init__(self, asr, tokenizer, logfile=sys.stderr): |
|
215 | 206 |
"""asr: WhisperASR object |
216 | 207 |
tokenizer: sentence tokenizer object for the target language. Must have a method *split* that behaves like the one of MosesTokenizer. |
217 | 208 |
output: where to store the log. Leave it unchanged to print to terminal. |
218 | 209 |
""" |
219 | 210 |
self.asr = asr |
220 | 211 |
self.tokenizer = tokenizer |
221 |
- self.output = output |
|
212 |
+ self.logfile = logfile |
|
222 | 213 |
|
223 | 214 |
self.init() |
224 | 215 |
|
... | ... | @@ -227,7 +218,7 @@ |
227 | 218 |
self.audio_buffer = np.array([],dtype=np.float32) |
228 | 219 |
self.buffer_time_offset = 0 |
229 | 220 |
|
230 |
- self.transcript_buffer = HypothesisBuffer(output=self.output) |
|
221 |
+ self.transcript_buffer = HypothesisBuffer(logfile=self.logfile) |
|
231 | 222 |
self.commited = [] |
232 | 223 |
self.last_chunked_at = 0 |
233 | 224 |
|
... | ... | @@ -262,9 +253,9 @@ |
262 | 253 |
""" |
263 | 254 |
|
264 | 255 |
prompt, non_prompt = self.prompt() |
265 |
- print("PROMPT:", prompt, file=self.output) |
|
266 |
- print("CONTEXT:", non_prompt, file=self.output) |
|
267 |
- print(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}",file=self.output) |
|
256 |
+ print("PROMPT:", prompt, file=self.logfile) |
|
257 |
+ print("CONTEXT:", non_prompt, file=self.logfile) |
|
258 |
+ print(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}",file=self.logfile) |
|
268 | 259 |
res = self.asr.transcribe(self.audio_buffer, init_prompt=prompt) |
269 | 260 |
|
270 | 261 |
# transform to [(beg,end,"word1"), ...] |
... | ... | @@ -273,8 +264,8 @@ |
273 | 264 |
self.transcript_buffer.insert(tsw, self.buffer_time_offset) |
274 | 265 |
o = self.transcript_buffer.flush() |
275 | 266 |
self.commited.extend(o) |
276 |
- print(">>>>COMPLETE NOW:",self.to_flush(o),file=self.output,flush=True) |
|
277 |
- print("INCOMPLETE:",self.to_flush(self.transcript_buffer.complete()),file=self.output,flush=True) |
|
267 |
+ print(">>>>COMPLETE NOW:",self.to_flush(o),file=self.logfile,flush=True) |
|
268 |
+ print("INCOMPLETE:",self.to_flush(self.transcript_buffer.complete()),file=self.logfile,flush=True) |
|
278 | 269 |
|
279 | 270 |
# there is a newly confirmed text |
280 | 271 |
if o: |
... | ... | @@ -293,14 +284,14 @@ |
293 | 284 |
# elif self.transcript_buffer.complete(): |
294 | 285 |
# self.silence_iters = 0 |
295 | 286 |
# elif not self.transcript_buffer.complete(): |
296 |
-# # print("NOT COMPLETE:",to_flush(self.transcript_buffer.complete()),file=self.output,flush=True) |
|
287 |
+# # print("NOT COMPLETE:",to_flush(self.transcript_buffer.complete()),file=self.logfile,flush=True) |
|
297 | 288 |
# self.silence_iters += 1 |
298 | 289 |
# if self.silence_iters >= 3: |
299 | 290 |
# n = self.last_chunked_at |
300 | 291 |
## self.chunk_completed_sentence() |
301 | 292 |
## if n == self.last_chunked_at: |
302 | 293 |
# self.chunk_at(self.last_chunked_at+self.chunk) |
303 |
-# print(f"\tCHUNK: 3-times silence! chunk_at {n}+{self.chunk}",file=self.output) |
|
294 |
+# print(f"\tCHUNK: 3-times silence! chunk_at {n}+{self.chunk}",file=self.logfile) |
|
304 | 295 |
## self.silence_iters = 0 |
305 | 296 |
|
306 | 297 |
|
... | ... | @@ -316,18 +307,18 @@ |
316 | 307 |
#while k>0 and self.commited[k][1] > l: |
317 | 308 |
# k -= 1 |
318 | 309 |
#t = self.commited[k][1] |
319 |
- print(f"chunking because of len",file=self.output) |
|
310 |
+ print(f"chunking because of len",file=self.logfile) |
|
320 | 311 |
#self.chunk_at(t) |
321 | 312 |
|
322 |
- print(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}",file=self.output) |
|
313 |
+ print(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}",file=self.logfile) |
|
323 | 314 |
return self.to_flush(o) |
324 | 315 |
|
325 | 316 |
def chunk_completed_sentence(self): |
326 | 317 |
if self.commited == []: return |
327 |
- print(self.commited,file=self.output) |
|
318 |
+ print(self.commited,file=self.logfile) |
|
328 | 319 |
sents = self.words_to_sentences(self.commited) |
329 | 320 |
for s in sents: |
330 |
- print("\t\tSENT:",s,file=self.output) |
|
321 |
+ print("\t\tSENT:",s,file=self.logfile) |
|
331 | 322 |
if len(sents) < 2: |
332 | 323 |
return |
333 | 324 |
while len(sents) > 2: |
... | ... | @@ -335,7 +326,7 @@ |
335 | 326 |
# we will continue with audio processing at this timestamp |
336 | 327 |
chunk_at = sents[-2][1] |
337 | 328 |
|
338 |
- print(f"--- sentence chunked at {chunk_at:2.2f}",file=self.output) |
|
329 |
+ print(f"--- sentence chunked at {chunk_at:2.2f}",file=self.logfile) |
|
339 | 330 |
self.chunk_at(chunk_at) |
340 | 331 |
|
341 | 332 |
def chunk_completed_segment(self, res): |
... | ... | @@ -352,12 +343,12 @@ |
352 | 343 |
ends.pop(-1) |
353 | 344 |
e = ends[-2]+self.buffer_time_offset |
354 | 345 |
if e <= t: |
355 |
- print(f"--- segment chunked at {e:2.2f}",file=self.output) |
|
346 |
+ print(f"--- segment chunked at {e:2.2f}",file=self.logfile) |
|
356 | 347 |
self.chunk_at(e) |
357 | 348 |
else: |
358 |
- print(f"--- last segment not within commited area",file=self.output) |
|
349 |
+ print(f"--- last segment not within commited area",file=self.logfile) |
|
359 | 350 |
else: |
360 |
- print(f"--- not enough segments to chunk",file=self.output) |
|
351 |
+ print(f"--- not enough segments to chunk",file=self.logfile) |
|
361 | 352 |
|
362 | 353 |
|
363 | 354 |
|
... | ... | @@ -403,7 +394,7 @@ |
403 | 394 |
""" |
404 | 395 |
o = self.transcript_buffer.complete() |
405 | 396 |
f = self.to_flush(o) |
406 |
- print("last, noncommited:",f,file=self.output) |
|
397 |
+ print("last, noncommited:",f,file=self.logfile) |
|
407 | 398 |
return f |
408 | 399 |
|
409 | 400 |
|
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?