

Construct an explicit logger rather than using the root logger
@ebdde208f3efb1232f7c5ab2421944d5c3e4ec54
--- whisper_online.py
+++ whisper_online.py
... | ... | @@ -11,6 +11,8 @@ |
11 | 11 |
import soundfile as sf |
12 | 12 |
import math |
13 | 13 |
|
14 |
+logger = logging.getLogger(__name__) |
|
15 |
+ |
|
14 | 16 |
@lru_cache |
15 | 17 |
def load_audio(fname): |
16 | 18 |
a, _ = librosa.load(fname, sr=16000, dtype=np.float32) |
... | ... | @@ -65,7 +67,7 @@ |
65 | 67 |
from whisper_timestamped import transcribe_timestamped |
66 | 68 |
self.transcribe_timestamped = transcribe_timestamped |
67 | 69 |
if model_dir is not None: |
68 |
- logging.debug("ignoring model_dir, not implemented") |
|
70 |
+ logger.debug("ignoring model_dir, not implemented") |
|
69 | 71 |
return whisper.load_model(modelsize, download_root=cache_dir) |
70 | 72 |
|
71 | 73 |
def transcribe(self, audio, init_prompt=""): |
... | ... | @@ -106,7 +108,7 @@ |
106 | 108 |
from faster_whisper import WhisperModel |
107 | 109 |
logging.getLogger("faster_whisper").setLevel(logging.WARNING) |
108 | 110 |
if model_dir is not None: |
109 |
- logging.debug(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.") |
|
111 |
+ logger.debug(f"Loading whisper model from model_dir {model_dir}. modelsize and cache_dir parameters are not used.") |
|
110 | 112 |
model_size_or_path = model_dir |
111 | 113 |
elif modelsize is not None: |
112 | 114 |
model_size_or_path = modelsize |
... | ... | @@ -229,7 +231,7 @@ |
229 | 231 |
|
230 | 232 |
# Process transcription/translation |
231 | 233 |
transcript = proc.create(**params) |
232 |
- logging.debug(f"OpenAI API processed accumulated {self.transcribed_seconds} seconds") |
|
234 |
+ logger.debug(f"OpenAI API processed accumulated {self.transcribed_seconds} seconds") |
|
233 | 235 |
|
234 | 236 |
return transcript |
235 | 237 |
|
... | ... | @@ -276,7 +278,7 @@ |
276 | 278 |
for j in range(i): |
277 | 279 |
words.append(repr(self.new.pop(0))) |
278 | 280 |
words_msg = "\t".join(words) |
279 |
- logging.debug(f"removing last {i} words: {words_msg}") |
|
281 |
+ logger.debug(f"removing last {i} words: {words_msg}") |
|
280 | 282 |
break |
281 | 283 |
|
282 | 284 |
def flush(self): |
... | ... | @@ -365,9 +367,9 @@ |
365 | 367 |
""" |
366 | 368 |
|
367 | 369 |
prompt, non_prompt = self.prompt() |
368 |
- logging.debug(f"PROMPT: {prompt}") |
|
369 |
- logging.debug(f"CONTEXT: {non_prompt}") |
|
370 |
- logging.debug(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}") |
|
370 |
+ logger.debug(f"PROMPT: {prompt}") |
|
371 |
+ logger.debug(f"CONTEXT: {non_prompt}") |
|
372 |
+ logger.debug(f"transcribing {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f} seconds from {self.buffer_time_offset:2.2f}") |
|
371 | 373 |
res = self.asr.transcribe(self.audio_buffer, init_prompt=prompt) |
372 | 374 |
|
373 | 375 |
# transform to [(beg,end,"word1"), ...] |
... | ... | @@ -377,9 +379,9 @@ |
377 | 379 |
o = self.transcript_buffer.flush() |
378 | 380 |
self.commited.extend(o) |
379 | 381 |
completed = self.to_flush(o) |
380 |
- logging.debug(f">>>>COMPLETE NOW: {completed}") |
|
382 |
+ logger.debug(f">>>>COMPLETE NOW: {completed}") |
|
381 | 383 |
the_rest = self.to_flush(self.transcript_buffer.complete()) |
382 |
- logging.debug(f"INCOMPLETE: {the_rest}") |
|
384 |
+ logger.debug(f"INCOMPLETE: {the_rest}") |
|
383 | 385 |
|
384 | 386 |
# there is a newly confirmed text |
385 | 387 |
|
... | ... | @@ -403,18 +405,18 @@ |
403 | 405 |
#while k>0 and self.commited[k][1] > l: |
404 | 406 |
# k -= 1 |
405 | 407 |
#t = self.commited[k][1] |
406 |
- logging.debug(f"chunking segment") |
|
408 |
+ logger.debug(f"chunking segment") |
|
407 | 409 |
#self.chunk_at(t) |
408 | 410 |
|
409 |
- logging.debug(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}") |
|
411 |
+ logger.debug(f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}") |
|
410 | 412 |
return self.to_flush(o) |
411 | 413 |
|
412 | 414 |
def chunk_completed_sentence(self): |
413 | 415 |
if self.commited == []: return |
414 |
- logging.debug(self.commited) |
|
416 |
+ logger.debug(self.commited) |
|
415 | 417 |
sents = self.words_to_sentences(self.commited) |
416 | 418 |
for s in sents: |
417 |
- logging.debug(f"\t\tSENT: {s}") |
|
419 |
+ logger.debug(f"\t\tSENT: {s}") |
|
418 | 420 |
if len(sents) < 2: |
419 | 421 |
return |
420 | 422 |
while len(sents) > 2: |
... | ... | @@ -422,7 +424,7 @@ |
422 | 424 |
# we will continue with audio processing at this timestamp |
423 | 425 |
chunk_at = sents[-2][1] |
424 | 426 |
|
425 |
- logging.debug(f"--- sentence chunked at {chunk_at:2.2f}") |
|
427 |
+ logger.debug(f"--- sentence chunked at {chunk_at:2.2f}") |
|
426 | 428 |
self.chunk_at(chunk_at) |
427 | 429 |
|
428 | 430 |
def chunk_completed_segment(self, res): |
... | ... | @@ -439,12 +441,12 @@ |
439 | 441 |
ends.pop(-1) |
440 | 442 |
e = ends[-2]+self.buffer_time_offset |
441 | 443 |
if e <= t: |
442 |
- logging.debug(f"--- segment chunked at {e:2.2f}") |
|
444 |
+ logger.debug(f"--- segment chunked at {e:2.2f}") |
|
443 | 445 |
self.chunk_at(e) |
444 | 446 |
else: |
445 |
- logging.debug(f"--- last segment not within commited area") |
|
447 |
+ logger.debug(f"--- last segment not within commited area") |
|
446 | 448 |
else: |
447 |
- logging.debug(f"--- not enough segments to chunk") |
|
449 |
+ logger.debug(f"--- not enough segments to chunk") |
|
448 | 450 |
|
449 | 451 |
|
450 | 452 |
|
... | ... | @@ -490,7 +492,7 @@ |
490 | 492 |
""" |
491 | 493 |
o = self.transcript_buffer.complete() |
492 | 494 |
f = self.to_flush(o) |
493 |
- logging.debug("last, noncommited: {f}") |
|
495 |
+ logger.debug("last, noncommited: {f}") |
|
494 | 496 |
return f |
495 | 497 |
|
496 | 498 |
|
... | ... | @@ -530,7 +532,7 @@ |
530 | 532 |
|
531 | 533 |
# the following languages are in Whisper, but not in wtpsplit: |
532 | 534 |
if lan in "as ba bo br bs fo haw hr ht jw lb ln lo mi nn oc sa sd sn so su sw tk tl tt".split(): |
533 |
- logging.debug(f"{lan} code is not supported by wtpsplit. Going to use None lang_code option.") |
|
535 |
+ logger.debug(f"{lan} code is not supported by wtpsplit. Going to use None lang_code option.") |
|
534 | 536 |
lan = None |
535 | 537 |
|
536 | 538 |
from wtpsplit import WtP |
... | ... | @@ -563,7 +565,7 @@ |
563 | 565 |
""" |
564 | 566 |
backend = args.backend |
565 | 567 |
if backend == "openai-api": |
566 |
- logging.debug("Using OpenAI API.") |
|
568 |
+ logger.debug("Using OpenAI API.") |
|
567 | 569 |
asr = OpenaiApiASR(lan=args.lan) |
568 | 570 |
else: |
569 | 571 |
if backend == "faster-whisper": |
... | ... | @@ -574,14 +576,14 @@ |
574 | 576 |
# Only for FasterWhisperASR and WhisperTimestampedASR |
575 | 577 |
size = args.model |
576 | 578 |
t = time.time() |
577 |
- logging.debug(f"Loading Whisper {size} model for {args.lan}...") |
|
579 |
+ logger.debug(f"Loading Whisper {size} model for {args.lan}...") |
|
578 | 580 |
asr = asr_cls(modelsize=size, lan=args.lan, cache_dir=args.model_cache_dir, model_dir=args.model_dir) |
579 | 581 |
e = time.time() |
580 |
- logging.debug(f"done. It took {round(e-t,2)} seconds.") |
|
582 |
+ logger.debug(f"done. It took {round(e-t,2)} seconds.") |
|
581 | 583 |
|
582 | 584 |
# Apply common configurations |
583 | 585 |
if getattr(args, 'vad', False): # Checks if VAD argument is present and True |
584 |
- logging.info("Setting VAD filter") |
|
586 |
+ logger.info("Setting VAD filter") |
|
585 | 587 |
asr.use_vad() |
586 | 588 |
|
587 | 589 |
language = args.lan |
... | ... | @@ -619,14 +621,14 @@ |
619 | 621 |
logfile = sys.stderr |
620 | 622 |
|
621 | 623 |
if args.offline and args.comp_unaware: |
622 |
- logging.error("No or one option from --offline and --comp_unaware are available, not both. Exiting.") |
|
624 |
+ logger.error("No or one option from --offline and --comp_unaware are available, not both. Exiting.") |
|
623 | 625 |
sys.exit(1) |
624 | 626 |
|
625 | 627 |
audio_path = args.audio_path |
626 | 628 |
|
627 | 629 |
SAMPLING_RATE = 16000 |
628 | 630 |
duration = len(load_audio(audio_path))/SAMPLING_RATE |
629 |
- logging.info("Audio duration is: %2.2f seconds" % duration) |
|
631 |
+ logger.info("Audio duration is: %2.2f seconds" % duration) |
|
630 | 632 |
|
631 | 633 |
asr, online = asr_factory(args, logfile=logfile) |
632 | 634 |
min_chunk = args.min_chunk_size |
... | ... | @@ -674,12 +676,12 @@ |
674 | 676 |
try: |
675 | 677 |
o = online.process_iter() |
676 | 678 |
except AssertionError as e: |
677 |
- logging.error(f"assertion error: {repr(e)}") |
|
679 |
+ logger.error(f"assertion error: {repr(e)}") |
|
678 | 680 |
pass |
679 | 681 |
else: |
680 | 682 |
output_transcript(o, now=end) |
681 | 683 |
|
682 |
- logging.debug(f"## last processed {end:.2f}s") |
|
684 |
+ logger.debug(f"## last processed {end:.2f}s") |
|
683 | 685 |
|
684 | 686 |
if end >= duration: |
685 | 687 |
break |
... | ... | @@ -706,12 +708,12 @@ |
706 | 708 |
try: |
707 | 709 |
o = online.process_iter() |
708 | 710 |
except AssertionError as e: |
709 |
- logging.error(f"assertion error: {e}") |
|
711 |
+ logger.error(f"assertion error: {e}") |
|
710 | 712 |
pass |
711 | 713 |
else: |
712 | 714 |
output_transcript(o) |
713 | 715 |
now = time.time() - start |
714 |
- logging.debug(f"## last processed {end:.2f} s, now is {now:.2f}, the latency is {now-end:.2f}") |
|
716 |
+ logger.debug(f"## last processed {end:.2f} s, now is {now:.2f}, the latency is {now-end:.2f}") |
|
715 | 717 |
|
716 | 718 |
if end >= duration: |
717 | 719 |
break |
--- whisper_online_server.py
+++ whisper_online_server.py
... | ... | @@ -7,6 +7,8 @@ |
7 | 7 |
import logging |
8 | 8 |
import numpy as np |
9 | 9 |
|
10 |
+logger = logging.getLogger(__name__) |
|
11 |
+print(__name__) |
|
10 | 12 |
parser = argparse.ArgumentParser() |
11 | 13 |
|
12 | 14 |
# server options |
... | ... | @@ -37,13 +39,6 @@ |
37 | 39 |
language = args.lan |
38 | 40 |
asr, online = asr_factory(args) |
39 | 41 |
min_chunk = args.min_chunk_size |
40 |
- |
|
41 |
- |
|
42 |
-if args.buffer_trimming == "sentence": |
|
43 |
- tokenizer = create_tokenizer(tgt_language) |
|
44 |
-else: |
|
45 |
- tokenizer = None |
|
46 |
-online = OnlineASRProcessor(asr,tokenizer,buffer_trimming=(args.buffer_trimming, args.buffer_trimming_sec)) |
|
47 | 42 |
|
48 | 43 |
# warm up the ASR because the very first transcribe takes more time than the others. |
49 | 44 |
# Test results in https://github.com/ufal/whisper_streaming/pull/81 |
... | ... | @@ -161,7 +156,7 @@ |
161 | 156 |
try: |
162 | 157 |
self.send_result(o) |
163 | 158 |
except BrokenPipeError: |
164 |
- logging.info("broken pipe -- connection closed?") |
|
159 |
+ logger.info("broken pipe -- connection closed?") |
|
165 | 160 |
break |
166 | 161 |
|
167 | 162 |
# o = online.finish() # this should be working |
... | ... | @@ -175,13 +170,13 @@ |
175 | 170 |
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) |
176 | 171 |
s.bind((args.host, args.port)) |
177 | 172 |
s.listen(1) |
178 |
- logging.info('Listening on'+str((args.host, args.port))) |
|
173 |
+ logger.info('Listening on'+str((args.host, args.port))) |
|
179 | 174 |
while True: |
180 | 175 |
conn, addr = s.accept() |
181 |
- logging.info('Connected to client on {}'.format(addr)) |
|
176 |
+ logger.info('Connected to client on {}'.format(addr)) |
|
182 | 177 |
connection = Connection(conn) |
183 | 178 |
proc = ServerProcessor(connection, online, min_chunk) |
184 | 179 |
proc.process() |
185 | 180 |
conn.close() |
186 |
- logging.info('Connection to client closed') |
|
187 |
-logging.info('Connection closed, terminating.') |
|
181 |
+ logger.info('Connection to client closed') |
|
182 |
+logger.info('Connection closed, terminating.') |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?