

fix #7
@11c47567cf48af1e26d7ab1a6b2fb95996992c38
--- whisper_online.py
+++ whisper_online.py
... | ... | @@ -459,16 +459,20 @@ |
459 | 459 |
SAMPLING_RATE = 16000 |
460 | 460 |
|
461 | 461 |
def __init__( |
462 |
- self, asr, tokenizer=None, buffer_trimming=("segment", 15), logfile=sys.stderr |
|
462 |
+ self, |
|
463 |
+ asr, |
|
464 |
+ tokenize_method=None, |
|
465 |
+ buffer_trimming=("segment", 15), |
|
466 |
+ logfile=sys.stderr, |
|
463 | 467 |
): |
464 | 468 |
"""asr: WhisperASR object |
465 |
- tokenizer: sentence tokenizer object for the target language. Must have a method *split* that behaves like the one of MosesTokenizer. It can be None, if "segment" buffer trimming option is used, then tokenizer is not used at all. |
|
469 |
+ tokenize_method: sentence tokenizer function for the target language. Must be a callable and behaves like the one of MosesTokenizer. It can be None, if "segment" buffer trimming option is used, then tokenizer is not used at all. |
|
466 | 470 |
("segment", 15) |
467 | 471 |
buffer_trimming: a pair of (option, seconds), where option is either "sentence" or "segment", and seconds is a number. Buffer is trimmed if it is longer than "seconds" threshold. Default is the most recommended option. |
468 | 472 |
logfile: where to store the log. |
469 | 473 |
""" |
470 | 474 |
self.asr = asr |
471 |
- self.tokenizer = tokenizer |
|
475 |
+ self.tokenize = tokenize_method |
|
472 | 476 |
self.logfile = logfile |
473 | 477 |
|
474 | 478 |
self.init() |
... | ... | @@ -612,13 +616,13 @@ |
612 | 616 |
self.buffer_time_offset = time |
613 | 617 |
|
614 | 618 |
def words_to_sentences(self, words): |
615 |
- """Uses self.tokenizer for sentence segmentation of words. |
|
619 |
+ """Uses self.tokenize for sentence segmentation of words. |
|
616 | 620 |
Returns: [(beg,end,"sentence 1"),...] |
617 | 621 |
""" |
618 | 622 |
|
619 | 623 |
cwords = [w for w in words] |
620 | 624 |
t = " ".join(o[2] for o in cwords) |
621 |
- s = self.tokenizer.split(t) |
|
625 |
+ s = self.tokenize(t) |
|
622 | 626 |
out = [] |
623 | 627 |
while s: |
624 | 628 |
beg = None |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?