

Further tidying of print output, so by default there's little on the console
@32191b5c6c670873b93b2deced3d7d2390b45139
--- whisper_online.py
+++ whisper_online.py
... | ... | @@ -4,6 +4,7 @@ |
4 | 4 |
import librosa |
5 | 5 |
from functools import lru_cache |
6 | 6 |
import time |
7 |
+import logging |
|
7 | 8 |
|
8 | 9 |
|
9 | 10 |
|
... | ... | @@ -57,7 +58,7 @@ |
57 | 58 |
from whisper_timestamped import transcribe_timestamped |
58 | 59 |
self.transcribe_timestamped = transcribe_timestamped |
59 | 60 |
if model_dir is not None: |
60 |
- print("ignoring model_dir, not implemented",file=self.logfile) |
|
61 |
+ logging.debug("ignoring model_dir, not implemented") |
|
61 | 62 |
return whisper.load_model(modelsize, download_root=cache_dir) |
62 | 63 |
|
63 | 64 |
def transcribe(self, audio, init_prompt=""): |
... | ... | @@ -97,7 +98,7 @@ |
97 | 98 |
def load_model(self, modelsize=None, cache_dir=None, model_dir=None): |
98 | 99 |
from faster_whisper import WhisperModel |
99 | 100 |
if model_dir is not None: |
100 |
- print(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.",file=self.logfile) |
|
101 |
+ logging.debug(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.") |
|
101 | 102 |
model_size_or_path = model_dir |
102 | 103 |
elif modelsize is not None: |
103 | 104 |
model_size_or_path = modelsize |
... | ... | @@ -173,9 +174,11 @@ |
173 | 174 |
c = " ".join([self.commited_in_buffer[-j][2] for j in range(1,i+1)][::-1]) |
174 | 175 |
tail = " ".join(self.new[j-1][2] for j in range(1,i+1)) |
175 | 176 |
if c == tail: |
176 |
- print("removing last",i,"words:",file=self.logfile) |
|
177 |
+ words = [] |
|
177 | 178 |
for j in range(i): |
178 |
- print("\t",self.new.pop(0),file=self.logfile) |
|
179 |
+ words.append(repr(self.new.pop(0))) |
|
180 |
+ words_msg = "\t".join(words) |
|
181 |
+ logging.debug(f"removing last {i} words: {words_msg}") |
|
179 | 182 |
break |
180 | 183 |
|
181 | 184 |
def flush(self): |
... | ... | @@ -267,9 +270,9 @@ |
267 | 270 |
""" |
268 | 271 |
|
269 | 272 |
prompt, non_prompt = self.prompt() |
270 |
- print("PROMPT:", prompt, file=self.logfile) |
|
271 |
- print("CONTEXT:", non_prompt, file=self.logfile) |
|
272 |
- print(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}",file=self.logfile) |
|
273 |
+ logging.debug(f"PROMPT: {prompt}") |
|
274 |
+ logging.debug(f"CONTEXT: {non_prompt}") |
|
275 |
+ logging.debug(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}") |
|
273 | 276 |
res = self.asr.transcribe(self.audio_buffer, init_prompt=prompt) |
274 | 277 |
|
275 | 278 |
# transform to [(beg,end,"word1"), ...] |
... | ... | @@ -278,8 +281,10 @@ |
278 | 281 |
self.transcript_buffer.insert(tsw, self.buffer_time_offset) |
279 | 282 |
o = self.transcript_buffer.flush() |
280 | 283 |
self.commited.extend(o) |
281 |
- print(">>>>COMPLETE NOW:",self.to_flush(o),file=self.logfile,flush=True) |
|
282 |
- print("INCOMPLETE:",self.to_flush(self.transcript_buffer.complete()),file=self.logfile,flush=True) |
|
284 |
+ completed = self.to_flush(o) |
|
285 |
+ logging.debug(f">>>>COMPLETE NOW: {completed}") |
|
286 |
+ the_rest = self.to_flush(self.transcript_buffer.complete()) |
|
287 |
+ logging.debug(f"INCOMPLETE: {the_rest}") |
|
283 | 288 |
|
284 | 289 |
# there is a newly confirmed text |
285 | 290 |
|
... | ... | @@ -303,18 +308,18 @@ |
303 | 308 |
#while k>0 and self.commited[k][1] > l: |
304 | 309 |
# k -= 1 |
305 | 310 |
#t = self.commited[k][1] |
306 |
- print(f"chunking segment",file=self.logfile) |
|
311 |
+ logging.debug(f"chunking segment") |
|
307 | 312 |
#self.chunk_at(t) |
308 | 313 |
|
309 |
- print(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}",file=self.logfile) |
|
314 |
+ logging.debug(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}") |
|
310 | 315 |
return self.to_flush(o) |
311 | 316 |
|
312 | 317 |
def chunk_completed_sentence(self): |
313 | 318 |
if self.commited == []: return |
314 |
- print(self.commited,file=self.logfile) |
|
319 |
+ logging.debug(self.commited) |
|
315 | 320 |
sents = self.words_to_sentences(self.commited) |
316 | 321 |
for s in sents: |
317 |
- print("\t\tSENT:",s,file=self.logfile) |
|
322 |
+ logging.debug(f"\t\tSENT: {s}") |
|
318 | 323 |
if len(sents) < 2: |
319 | 324 |
return |
320 | 325 |
while len(sents) > 2: |
... | ... | @@ -322,7 +327,7 @@ |
322 | 327 |
# we will continue with audio processing at this timestamp |
323 | 328 |
chunk_at = sents[-2][1] |
324 | 329 |
|
325 |
- print(f"--- sentence chunked at {chunk_at:2.2f}",file=self.logfile) |
|
330 |
+ logging.debug(f"--- sentence chunked at {chunk_at:2.2f}") |
|
326 | 331 |
self.chunk_at(chunk_at) |
327 | 332 |
|
328 | 333 |
def chunk_completed_segment(self, res): |
... | ... | @@ -339,12 +344,12 @@ |
339 | 344 |
ends.pop(-1) |
340 | 345 |
e = ends[-2]+self.buffer_time_offset |
341 | 346 |
if e <= t: |
342 |
- print(f"--- segment chunked at {e:2.2f}",file=self.logfile) |
|
347 |
+ logging.debug(f"--- segment chunked at {e:2.2f}") |
|
343 | 348 |
self.chunk_at(e) |
344 | 349 |
else: |
345 |
- print(f"--- last segment not within commited area",file=self.logfile) |
|
350 |
+ logging.debug(f"--- last segment not within commited area") |
|
346 | 351 |
else: |
347 |
- print(f"--- not enough segments to chunk",file=self.logfile) |
|
352 |
+ logging.debug(f"--- not enough segments to chunk") |
|
348 | 353 |
|
349 | 354 |
|
350 | 355 |
|
... | ... | @@ -391,7 +396,7 @@ |
391 | 396 |
""" |
392 | 397 |
o = self.transcript_buffer.complete() |
393 | 398 |
f = self.to_flush(o) |
394 |
- print("last, noncommited:",f,file=self.logfile) |
|
399 |
+ logging.debug("last, noncommited: {f}") |
|
395 | 400 |
return f |
396 | 401 |
|
397 | 402 |
|
... | ... | @@ -431,7 +436,7 @@ |
431 | 436 |
|
432 | 437 |
# the following languages are in Whisper, but not in wtpsplit: |
433 | 438 |
if lan in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split(): |
434 |
- print(f"{lan} code is not supported by wtpsplit. Going to use None lang_code option.", file=sys.stderr) |
|
439 |
+ logging.debug(f"{lan} code is not supported by wtpsplit. Going to use None lang_code option.") |
|
435 | 440 |
lan = None |
436 | 441 |
|
437 | 442 |
from wtpsplit import WtP |
... | ... | @@ -476,20 +481,20 @@ |
476 | 481 |
logfile = sys.stderr |
477 | 482 |
|
478 | 483 |
if args.offline and args.comp_unaware: |
479 |
- print("No or one option from --offline and --comp_unaware are available, not both. Exiting.",file=logfile) |
|
484 |
+ logging.error("No or one option from --offline and --comp_unaware are available, not both. Exiting.") |
|
480 | 485 |
sys.exit(1) |
481 | 486 |
|
482 | 487 |
audio_path = args.audio_path |
483 | 488 |
|
484 | 489 |
SAMPLING_RATE = 16000 |
485 | 490 |
duration = len(load_audio(audio_path))/SAMPLING_RATE |
486 |
- print("Audio duration is: %2.2f seconds" % duration, file=logfile) |
|
491 |
+ logging.info("Audio duration is: %2.2f seconds" % duration) |
|
487 | 492 |
|
488 | 493 |
size = args.model |
489 | 494 |
language = args.lan |
490 | 495 |
|
491 | 496 |
t = time.time() |
492 |
- print(f"Loading Whisper {size} model for {language}...",file=logfile,end=" ",flush=True) |
|
497 |
+ logging.info(f"Loading Whisper {size} model for {language}...") |
|
493 | 498 |
|
494 | 499 |
if args.backend == "faster-whisper": |
495 | 500 |
asr_cls = FasterWhisperASR |
... | ... | @@ -506,10 +511,10 @@ |
506 | 511 |
|
507 | 512 |
|
508 | 513 |
e = time.time() |
509 |
- print(f"done. It took {round(e-t,2)} seconds.",file=logfile) |
|
514 |
+ logging.info(f"done. It took {round(e-t,2)} seconds.") |
|
510 | 515 |
|
511 | 516 |
if args.vad: |
512 |
- print("setting VAD filter",file=logfile) |
|
517 |
+ logging.info("setting VAD filter") |
|
513 | 518 |
asr.use_vad() |
514 | 519 |
|
515 | 520 |
|
... | ... | @@ -543,16 +548,15 @@ |
543 | 548 |
print("%1.4f %1.0f %1.0f %s" % (now*1000, o[0]*1000,o[1]*1000,o[2]),file=logfile,flush=True) |
544 | 549 |
print("%1.4f %1.0f %1.0f %s" % (now*1000, o[0]*1000,o[1]*1000,o[2]),flush=True) |
545 | 550 |
else: |
546 |
- print(o,file=logfile,flush=True) |
|
551 |
+ print("here?", o,file=logfile,flush=True) |
|
547 | 552 |
|
548 | 553 |
if args.offline: ## offline mode processing (for testing/debugging) |
549 | 554 |
a = load_audio(audio_path) |
550 | 555 |
online.insert_audio_chunk(a) |
551 | 556 |
try: |
552 | 557 |
o = online.process_iter() |
553 |
- except AssertionError: |
|
554 |
- print("assertion error",file=logfile) |
|
555 |
- pass |
|
558 |
+ except AssertionError as e: |
|
559 |
+ log.error(f"assertion error: {repr(e)}") |
|
556 | 560 |
else: |
557 | 561 |
output_transcript(o) |
558 | 562 |
now = None |
... | ... | @@ -563,13 +567,13 @@ |
563 | 567 |
online.insert_audio_chunk(a) |
564 | 568 |
try: |
565 | 569 |
o = online.process_iter() |
566 |
- except AssertionError: |
|
567 |
- print("assertion error",file=logfile) |
|
570 |
+ except AssertionError as e: |
|
571 |
+ logging.error(f"assertion error: {repr(e)}") |
|
568 | 572 |
pass |
569 | 573 |
else: |
570 | 574 |
output_transcript(o, now=end) |
571 | 575 |
|
572 |
- print(f"## last processed {end:.2f}s",file=logfile,flush=True) |
|
576 |
+ logging.debug(f"## last processed {end:.2f}s") |
|
573 | 577 |
|
574 | 578 |
if end >= duration: |
575 | 579 |
break |
... | ... | @@ -595,13 +599,13 @@ |
595 | 599 |
|
596 | 600 |
try: |
597 | 601 |
o = online.process_iter() |
598 |
- except AssertionError: |
|
599 |
- print("assertion error",file=logfile) |
|
602 |
+ except AssertionError as e: |
|
603 |
+ logging.error(f"assertion error: {e}") |
|
600 | 604 |
pass |
601 | 605 |
else: |
602 | 606 |
output_transcript(o) |
603 | 607 |
now = time.time() - start |
604 |
- print(f"## last processed {end:.2f} s, now is {now:.2f}, the latency is {now-end:.2f}",file=logfile,flush=True) |
|
608 |
+ logging.debug(f"## last processed {end:.2f} s, now is {now:.2f}, the latency is {now-end:.2f}") |
|
605 | 609 |
|
606 | 610 |
if end >= duration: |
607 | 611 |
break |
--- whisper_online_server.py
+++ whisper_online_server.py
... | ... | @@ -39,6 +39,7 @@ |
39 | 39 |
if args.backend == "faster-whisper": |
40 | 40 |
from faster_whisper import WhisperModel |
41 | 41 |
asr_cls = FasterWhisperASR |
42 |
+ logging.getLogger("faster_whisper").setLevel(logging.WARNING) |
|
42 | 43 |
else: |
43 | 44 |
import whisper |
44 | 45 |
import whisper_timestamped |
... | ... | @@ -80,7 +81,7 @@ |
80 | 81 |
# warm up the ASR, because the very first transcribe takes much more time than the other |
81 | 82 |
asr.transcribe(a) |
82 | 83 |
else: |
83 |
- logging.info("Whisper is not warmed up") |
|
84 |
+ logging.debug("Whisper is not warmed up") |
|
84 | 85 |
|
85 | 86 |
|
86 | 87 |
######### Server objects |
... | ... | @@ -135,8 +136,6 @@ |
135 | 136 |
out = [] |
136 | 137 |
while sum(len(x) for x in out) < self.min_chunk*SAMPLING_RATE: |
137 | 138 |
raw_bytes = self.connection.non_blocking_receive_audio() |
138 |
- print(raw_bytes[:10]) |
|
139 |
- print(len(raw_bytes)) |
|
140 | 139 |
if not raw_bytes: |
141 | 140 |
break |
142 | 141 |
sf = soundfile.SoundFile(io.BytesIO(raw_bytes), channels=1,endian="LITTLE",samplerate=SAMPLING_RATE, subtype="PCM_16",format="RAW") |
... | ... | @@ -167,7 +166,7 @@ |
167 | 166 |
print("%1.0f %1.0f %s" % (beg,end,o[2]),flush=True,file=sys.stderr) |
168 | 167 |
return "%1.0f %1.0f %s" % (beg,end,o[2]) |
169 | 168 |
else: |
170 |
- print(o,file=sys.stderr,flush=True) |
|
169 |
+ # No text, so no output |
|
171 | 170 |
return None |
172 | 171 |
|
173 | 172 |
def send_result(self, o): |
... | ... | @@ -181,14 +180,13 @@ |
181 | 180 |
while True: |
182 | 181 |
a = self.receive_audio_chunk() |
183 | 182 |
if a is None: |
184 |
- print("break here",file=sys.stderr) |
|
185 | 183 |
break |
186 | 184 |
self.online_asr_proc.insert_audio_chunk(a) |
187 | 185 |
o = online.process_iter() |
188 | 186 |
try: |
189 | 187 |
self.send_result(o) |
190 | 188 |
except BrokenPipeError: |
191 |
- print("broken pipe -- connection closed?",file=sys.stderr) |
|
189 |
+ logging.info("broken pipe -- connection closed?") |
|
192 | 190 |
break |
193 | 191 |
|
194 | 192 |
# o = online.finish() # this should be working |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?