• Y
  • List All
  • Feedback
    • This Project
    • All Projects
Profile Account settings Log out
  • Favorite
  • Project
  • All
Loading...
  • Log in
  • Sign up
yjyoon / whisper_server_speaches star
  • Project homeH
  • CodeC
  • IssueI
  • Pull requestP
  • Review R
  • MilestoneM
  • BoardB
  • Files
  • Commit
  • Branches
whisper_server_speachessrcspeachestext_utils_test.py
Download as .zip file
File name
Commit message
Commit date
.github/workflows
feat: switch to ghcr.io
01-10
configuration
feat: add instrumentation
2024-12-17
docs
rename to `speaches`
01-12
examples
rename to `speaches`
01-12
scripts
chore: misc changes
2024-10-03
src/speaches
rename to `speaches`
01-12
tests
rename to `speaches`
01-12
.dockerignore
chore: update .dockerignore
2024-11-01
.envrc
init
2024-05-20
.gitattributes
chore(deps): update pre-commit hook astral-sh/ruff-pre-commit to v0.7.2
2024-11-02
.gitignore
chore: update .gitignore
2024-07-03
.pre-commit-config.yaml
docs: usage pages (and more)
01-12
Dockerfile
chore(deps): update ghcr.io/astral-sh/uv docker tag to v0.5.18
01-12
LICENSE
init
2024-05-20
README.md
rename to `speaches`
01-12
Taskfile.yaml
rename to `speaches`
01-12
audio.wav
chore: update volume names and mount points
01-10
compose.cpu.yaml
rename to `speaches`
01-12
compose.cuda-cdi.yaml
rename to `speaches`
01-12
compose.cuda.yaml
rename to `speaches`
01-12
compose.observability.yaml
rename to `speaches`
01-12
compose.yaml
rename to `speaches`
01-12
flake.lock
deps: update flake
2024-11-01
flake.nix
chore(deps): add loki and tempo package to flake
2024-12-17
mkdocs.yml
rename to `speaches`
01-12
pyproject.toml
rename to `speaches`
01-12
renovate.json
feat: renovate handle pre-commit
2024-11-01
uv.lock
rename to `speaches`
01-12
File name
Commit message
Commit date
routers
rename to `speaches`
01-12
__init__.py
rename to `speaches`
01-12
api_models.py
rename to `speaches`
01-12
asr.py
rename to `speaches`
01-12
audio.py
rename to `speaches`
01-12
config.py
rename to `speaches`
01-12
dependencies.py
rename to `speaches`
01-12
gradio_app.py
rename to `speaches`
01-12
hf_utils.py
rename to `speaches`
01-12
logger.py
rename to `speaches`
01-12
main.py
rename to `speaches`
01-12
model_manager.py
rename to `speaches`
01-12
text_utils.py
rename to `speaches`
01-12
text_utils_test.py
rename to `speaches`
01-12
transcriber.py
rename to `speaches`
01-12
Fedir Zadniprovskyi 01-12 72b312c rename to `speaches` UNIX
Raw Open in browser Change history
from speaches.api_models import TranscriptionWord from speaches.text_utils import ( canonicalize_word, common_prefix, is_eos, srt_format_timestamp, to_full_sentences, vtt_format_timestamp, ) def test_is_eos() -> None: assert not is_eos("Hello") assert not is_eos("Hello...") assert is_eos("Hello.") assert is_eos("Hello!") assert is_eos("Hello?") assert not is_eos("Hello. Yo") assert not is_eos("Hello. Yo...") assert is_eos("Hello. Yo.") def tests_to_full_sentences() -> None: def word(text: str) -> TranscriptionWord: return TranscriptionWord(word=text, start=0.0, end=0.0, probability=0.0) assert to_full_sentences([]) == [] assert to_full_sentences([word(text="Hello")]) == [] assert to_full_sentences([word(text="Hello..."), word(" world")]) == [] assert to_full_sentences([word(text="Hello..."), word(" world.")]) == [[word("Hello..."), word(" world.")]] assert to_full_sentences([word(text="Hello..."), word(" world."), word(" How")]) == [ [word("Hello..."), word(" world.")], ] def test_srt_format_timestamp() -> None: assert srt_format_timestamp(0.0) == "00:00:00,000" assert srt_format_timestamp(1.0) == "00:00:01,000" assert srt_format_timestamp(1.234) == "00:00:01,234" assert srt_format_timestamp(60.0) == "00:01:00,000" assert srt_format_timestamp(61.0) == "00:01:01,000" assert srt_format_timestamp(61.234) == "00:01:01,234" assert srt_format_timestamp(3600.0) == "01:00:00,000" assert srt_format_timestamp(3601.0) == "01:00:01,000" assert srt_format_timestamp(3601.234) == "01:00:01,234" assert srt_format_timestamp(23423.4234) == "06:30:23,423" def test_vtt_format_timestamp() -> None: assert vtt_format_timestamp(0.0) == "00:00:00.000" assert vtt_format_timestamp(1.0) == "00:00:01.000" assert vtt_format_timestamp(1.234) == "00:00:01.234" assert vtt_format_timestamp(60.0) == "00:01:00.000" assert vtt_format_timestamp(61.0) == "00:01:01.000" assert vtt_format_timestamp(61.234) == "00:01:01.234" assert vtt_format_timestamp(3600.0) == "01:00:00.000" assert vtt_format_timestamp(3601.0) == "01:00:01.000" assert vtt_format_timestamp(3601.234) == "01:00:01.234" assert vtt_format_timestamp(23423.4234) == "06:30:23.423" def test_canonicalize_word() -> None: assert canonicalize_word("ABC") == "abc" assert canonicalize_word("...ABC?") == "abc" assert canonicalize_word("... AbC ...") == "abc" def test_common_prefix() -> None: def word(text: str) -> TranscriptionWord: return TranscriptionWord(word=text, start=0.0, end=0.0, probability=0.0) a = [word("a"), word("b"), word("c")] b = [word("a"), word("b"), word("c")] assert common_prefix(a, b) == [word("a"), word("b"), word("c")] a = [word("a"), word("b"), word("c")] b = [word("a"), word("b"), word("d")] assert common_prefix(a, b) == [word("a"), word("b")] a = [word("a"), word("b"), word("c")] b = [word("a")] assert common_prefix(a, b) == [word("a")] a = [word("a")] b = [word("a"), word("b"), word("c")] assert common_prefix(a, b) == [word("a")] a = [word("a")] b = [] assert common_prefix(a, b) == [] a = [] b = [word("a")] assert common_prefix(a, b) == [] a = [word("a"), word("b"), word("c")] b = [word("b"), word("c")] assert common_prefix(a, b) == [] def test_common_prefix_and_canonicalization() -> None: def word(text: str) -> TranscriptionWord: return TranscriptionWord(word=text, start=0.0, end=0.0, probability=0.0) a = [word("A...")] b = [word("a?"), word("b"), word("c")] assert common_prefix(a, b) == [word("A...")] a = [word("A..."), word("B?"), word("C,")] b = [word("a??"), word(" b"), word(" ,c")] assert common_prefix(a, b) == [word("A..."), word("B?"), word("C,")]

          
        
    
    
Copyright Yona authors & © NAVER Corp. & NAVER LABS Supported by NAVER CLOUD PLATFORM

or
Sign in with github login with Google Sign in with Google
Reset password | Sign up