

Interpolate word timestamps based on word character length
@46598adf1cac165204e66fec7869b18e3c70a22b
--- whisper_online.py
+++ whisper_online.py
... | ... | @@ -190,12 +190,14 @@ |
190 | 190 |
|
191 | 191 |
# Assign start and end times for each word |
192 | 192 |
# We only have timestamps per segment, so interpolating start and end-times |
193 |
- # assuming equal duration per word |
|
193 |
+ |
|
194 |
+ |
|
194 | 195 |
segment_duration = segment["end"] - segment["start"] |
195 |
- duration_per_word = segment_duration / len(words) |
|
196 |
+ total_characters = sum(len(word) for word in words) |
|
197 |
+ duration_per_character = segment_duration / total_characters |
|
196 | 198 |
start_time = segment["start"] |
197 | 199 |
for word in words: |
198 |
- end_time = start_time + duration_per_word |
|
200 |
+ end_time = start_time + duration_per_character * len(word) |
|
199 | 201 |
o.append((start_time, end_time, word)) |
200 | 202 |
start_time = end_time |
201 | 203 |
|
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?