Commit @43cc67a33b33b2114694e13cfba93f62438b8fa8 - yjyoon/whisper_server

9922993

43cc67a

Dockerfile

--- Dockerfile

+++ Dockerfile


 ARG BASE_IMAGE=nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
 # hadolint ignore=DL3006
 FROM ${BASE_IMAGE}
-LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
+LABEL org.opencontainers.image.source="https://github.com/speaches-ai/speaches"
 LABEL org.opencontainers.image.licenses="MIT"
 # `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
 # hadolint ignore=DL3008

 USER ubuntu
 ENV HOME=/home/ubuntu \
     PATH=/home/ubuntu/.local/bin:$PATH
-WORKDIR $HOME/faster-whisper-server
+WORKDIR $HOME/speaches
 # https://docs.astral.sh/uv/guides/integration/docker/#installing-uv
 COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.14 /uv /bin/uv
 # https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers

 ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
 ENV UVICORN_HOST=0.0.0.0
 ENV UVICORN_PORT=8000
-ENV PATH="$HOME/faster-whisper-server/.venv/bin:$PATH"
+ENV PATH="$HOME/speaches/.venv/bin:$PATH"
 # https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hfhubenablehftransfer
 # NOTE: I've disabled this because it doesn't inside of Docker container. I couldn't pinpoint the exact reason. This doesn't happen when running the server locally.
 # RuntimeError: An error occurred while downloading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling.

 # https://www.reddit.com/r/StableDiffusion/comments/1f6asvd/gradio_sends_ip_address_telemetry_by_default/
 ENV DO_NOT_TRACK=1
 EXPOSE 8000
-CMD ["uvicorn", "--factory", "faster_whisper_server.main:create_app"]
+CMD ["uvicorn", "--factory", "speaches.main:create_app"]

9922993

43cc67a

README.md

--- README.md

+++ README.md


-# Faster Whisper Server
+> [!NOTE]
+> This project was previously named `faster-whisper-server`. I've decided to change the name from `faster-whisper-server`, as the project has evolved to support more than just transcription.
 
-`faster-whisper-server` is an OpenAI API-compatible transcription server which uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) as its backend.
+# Speaches
+
+`speaches` is an OpenAI API-compatible server supporting transcription, translation, and speech generation. For transcription/translation it uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and for text-to-speech [piper](https://github.com/rhasspy/piper) is used.
+
 Features:
 
 - GPU and CPU support.
 - Easily deployable using Docker.
-- **Configurable through environment variables (see [config.py](./src/faster_whisper_server/config.py))**.
+- **Configurable through environment variables (see [config.py](./src/speaches/config.py))**.
 - OpenAI API compatible.
 - Streaming support (transcription is sent via [SSE](https://en.wikipedia.org/wiki/Server-sent_events) as the audio is transcribed. You don't need to wait for the audio to fully be transcribed before receiving it).
 - Live transcription support (audio is sent via websocket as it's generated).

 See [OpenAI API reference](https://platform.openai.com/docs/api-reference/audio) for more information.
 
 - Audio file transcription via `POST /v1/audio/transcriptions` endpoint.
-  - Unlike OpenAI's API, `faster-whisper-server` also supports streaming transcriptions (and translations). This is useful for when you want to process large audio files and would rather receive the transcription in chunks as they are processed, rather than waiting for the whole file to be transcribed. It works similarly to chat messages when chatting with LLMs.
+  - Unlike OpenAI's API, `speaches` also supports streaming transcriptions (and translations). This is useful for when you want to process large audio files and would rather receive the transcription in chunks as they are processed, rather than waiting for the whole file to be transcribed. It works similarly to chat messages when chatting with LLMs.
 - Audio file translation via `POST /v1/audio/translations` endpoint.
 - Live audio transcription via `WS /v1/audio/transcriptions` endpoint.
   - LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) | [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for live transcription.

 NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
 
 ```bash
-curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
+curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.yaml
 
 # for GPU support
-curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
+curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cuda.yaml
 docker compose --file compose.cuda.yaml up --detach
 # for CPU only (use this if you don't have a GPU, as the image is much smaller)
-curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
+curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cpu.yaml
 docker compose --file compose.cpu.yaml up --detach
 ```
 

 
 ```bash
 # for GPU support
-docker run --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --detach fedirz/faster-whisper-server:latest-cuda
+docker run --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --detach ghcr.io/speaches-ai/speaches:latest-cuda
 # for CPU only (use this if you don't have a GPU, as the image is much smaller)
-docker run --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
+docker run --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=Systran/faster-whisper-small --detach ghcr.io/speaches-ai/speaches:latest-cpu
 ```
 
 ### Using Kubernetes

9922993

43cc67a

Taskfile.yaml

--- Taskfile.yaml

+++ Taskfile.yaml


 tasks:
   server:
     cmds:
-      - pkill --signal SIGKILL --echo --full 'uvicorn --factory --host 0.0.0.0 faster_whisper_server.main:create_app' || true
-      - opentelemetry-instrument uvicorn --factory --host 0.0.0.0 faster_whisper_server.main:create_app {{.CLI_ARGS}}
+      - pkill --signal SIGKILL --echo --full 'uvicorn --factory --host 0.0.0.0 speaches.main:create_app' || true
+      - opentelemetry-instrument uvicorn --factory --host 0.0.0.0 speaches.main:create_app {{.CLI_ARGS}}
     sources:
       - src/**/*.py
   test:

9922993

43cc67a

compose.cpu.yaml

--- compose.cpu.yaml

+++ compose.cpu.yaml


 # include:
 #   - compose.observability.yaml
 services:
-  faster-whisper-server:
+  speaches:
     extends:
       file: compose.yaml
-      service: faster-whisper-server
-    image: fedirz/faster-whisper-server:latest-cpu
+      service: speaches
+    image: ghcr.io/speaches-ai/speaches:latest-cpu
     build:
       args:
         BASE_IMAGE: ubuntu:24.04

9922993

43cc67a

compose.cuda-cdi.yaml

--- compose.cuda-cdi.yaml

+++ compose.cuda-cdi.yaml


 # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
 # https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
 services:
-  faster-whisper-server:
+  speaches:
     extends:
       file: compose.cuda.yaml
-      service: faster-whisper-server
+      service: speaches
     volumes:
       - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
     deploy:

9922993

43cc67a

compose.cuda.yaml

--- compose.cuda.yaml

+++ compose.cuda.yaml


 # include:
 #   - compose.observability.yaml
 services:
-  faster-whisper-server:
+  speaches:
     extends:
       file: compose.yaml
-      service: faster-whisper-server
-    image: fedirz/faster-whisper-server:latest-cuda
+      service: speaches
+    image: ghcr.io/speaches-ai/speaches:latest-cuda
     build:
       args:
         BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04

9922993

43cc67a

compose.observability.yaml

--- compose.observability.yaml

+++ compose.observability.yaml


     volumes:
       - ./configuration/opentelemetry-collector.yaml:/etc/opentelemetry-collector.yaml
     ports:
-      # NOTE: when `faster-whisper-server` is also running as a Docker Compose service, this doesn't need to be exposed.
+      # NOTE: when `speaches` is also running as a Docker Compose service, this doesn't need to be exposed.
       - 4317:4317 # OTLP gRPC receiver
       # - 4318:4318 # OTLP HTTP receiver
       # - 8888:8888 # Prometheus metrics exposed by the Collector

9922993

43cc67a

compose.yaml

--- compose.yaml

+++ compose.yaml


 # TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
 services:
-  faster-whisper-server:
-    container_name: faster-whisper-server
+  speaches:
+    container_name: speaches
     build:
       dockerfile: Dockerfile
       context: .

9922993

43cc67a

docs/configuration.md

--- docs/configuration.md

+++ docs/configuration.md


 <!-- https://mkdocstrings.github.io/python/usage/configuration/general/ -->
-::: faster_whisper_server.config.Config
+::: speaches.config.Config
     options:
         show_bases: true
         show_if_no_docstring: true

             - "!speech_*"
             - "!transcription_*"
 
-::: faster_whisper_server.config.WhisperConfig
+::: speaches.config.WhisperConfig
 
 <!-- TODO: nested model `whisper`  -->
 <!-- TODO: Insert new lines for multi-line docstrings  -->

9922993

43cc67a

docs/installation.md

--- docs/installation.md

+++ docs/installation.md


 === "CUDA"
 
     ```bash
-    curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
-    curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
+    curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.yaml
+    curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cuda.yaml
     export COMPOSE_FILE=compose.cuda.yaml
     ```
 
 === "CUDA (with CDI feature enabled)"
 
     ```bash
-    curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
-    curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
-    curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda-cdi.yaml
+    curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.yaml
+    curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cuda.yaml
+    curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cuda-cdi.yaml
     export COMPOSE_FILE=compose.cuda-cdi.yaml
     ```
 
 === "CPU"
 
     ```bash
-    curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
-    curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
+    curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.yaml
+    curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cpu.yaml
     export COMPOSE_FILE=compose.cpu.yaml
     ```
 

       --rm \
       --detach \
       --publish 8000:8000 \
-      --name faster-whisper-server \
+      --name speaches \
       --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub \
       --gpus=all \
-      fedirz/faster-whisper-server:latest-cuda
+      ghcr.io/speaches-ai/speaches:latest-cuda
     ```
 
 === "CUDA (with CDI feature enabled)"

       --rm \
       --detach \
       --publish 8000:8000 \
-      --name faster-whisper-server \
+      --name speaches \
       --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub \
       --device=nvidia.com/gpu=all \
-      fedirz/faster-whisper-server:latest-cuda
+      ghcr.io/speaches-ai/speaches:latest-cuda
     ```
 
 === "CPU"

       --rm \
       --detach \
       --publish 8000:8000 \
-      --name faster-whisper-server \
+      --name speaches \
       --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub \
-      fedirz/faster-whisper-server:latest-cpu
+      ghcr.io/speaches-ai/speaches:latest-cpu
     ```
 
 ??? note "Build from source"
 
     ```bash
-    docker build --tag faster-whisper-server .
+    docker build --tag speaches .
 
     # NOTE: you need to install and enable [buildx](https://github.com/docker/buildx) for multi-platform builds
     # Build image for both amd64 and arm64
-    docker buildx build --tag faster-whisper-server --platform linux/amd64,linux/arm64 .
+    docker buildx build --tag speaches --platform linux/amd64,linux/arm64 .
 
     # Build image without CUDA support
-    docker build --tag faster-whisper-server --build-arg BASE_IMAGE=ubuntu:24.04 .
+    docker build --tag speaches --build-arg BASE_IMAGE=ubuntu:24.04 .
     ```
 
 ## Python (requires Python 3.12+ and `uv` package manager)
 
 ```bash
-git clone https://github.com/fedirz/faster-whisper-server.git
-cd faster-whisper-server
+git clone https://github.com/speaches-ai/speaches.git
+cd speaches
 uv venv
 sourve .venv/bin/activate
 uv sync --all-extras
-uvicorn --factory --host 0.0.0.0 faster_whisper_server.main:create_app
+uvicorn --factory --host 0.0.0.0 speaches.main:create_app
 ```

9922993

43cc67a

docs/introduction.md

--- docs/introduction.md

+++ docs/introduction.md


 
 TODO: add HuggingFace Space URL
 
-# Faster Whisper Server
+# Speaches
 
-`faster-whisper-server` is an OpenAI API-compatible server supporting transcription, translation, and speech generation. For transcription/translation it uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and for text-to-speech [piper](https://github.com/rhasspy/piper) is used.
+`speaches` is an OpenAI API-compatible server supporting transcription, translation, and speech generation. For transcription/translation it uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and for text-to-speech [piper](https://github.com/rhasspy/piper) is used.
 
 ## Features:
 
 - GPU and CPU support.
 - [Deployable via Docker Compose / Docker](./installation.md)
 - [Highly configurable](./configuration.md)
-- OpenAI API compatible. All tools and SDKs that work with OpenAI's API should work with `faster-whisper-server`.
+- OpenAI API compatible. All tools and SDKs that work with OpenAI's API should work with `speaches`.
 - Streaming support (transcription is sent via [SSE](https://en.wikipedia.org/wiki/Server-sent_events) as the audio is transcribed. You don't need to wait for the audio to fully be transcribed before receiving it).
 - Live transcription support (audio is sent via websocket as it's generated).
 - Dynamic model loading / offloading. Just specify which model you want to use in the request and it will be loaded automatically. It will then be unloaded after a period of inactivity.
+- [Text-to-speech (TTS) via `piper`]
 - (Coming soon) Audio generation (chat completions endpoint) | [OpenAI Documentation](https://platform.openai.com/docs/guides/realtime)
   - Generate a spoken audio summary of a body of text (text in, audio out)
   - Perform sentiment analysis on a recording (audio in, text out)

 See [OpenAI API reference](https://platform.openai.com/docs/api-reference/audio) for more information.
 
 - Audio file transcription via `POST /v1/audio/transcriptions` endpoint.
-  - Unlike OpenAI's API, `faster-whisper-server` also supports streaming transcriptions (and translations). This is useful for when you want to process large audio files and would rather receive the transcription in chunks as they are processed, rather than waiting for the whole file to be transcribed. It works similarly to chat messages when chatting with LLMs.
+  - Unlike OpenAI's API, `speaches` also supports streaming transcriptions (and translations). This is useful for when you want to process large audio files and would rather receive the transcription in chunks as they are processed, rather than waiting for the whole file to be transcribed. It works similarly to chat messages when chatting with LLMs.
 - Audio file translation via `POST /v1/audio/translations` endpoint.
 - Live audio transcription via `WS /v1/audio/transcriptions` endpoint.
   - LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) | [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for live transcription.

9922993

43cc67a

docs/openapi.json

--- docs/openapi.json

+++ docs/openapi.json

@@ -1,1 +1,1 @@

-{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/v1/audio/translations":{"post":{"tags":["automatic-speech-recognition"],"summary":"Translate File","operationId":"translate_file_v1_audio_translations_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_translate_file_v1_audio_translations_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"type":"string"},{"$ref":"#/components/schemas/CreateTranscriptionResponseJson"},{"$ref":"#/components/schemas/CreateTranscriptionResponseVerboseJson"}],"title":"Response Translate File V1 Audio Translations Post"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/transcriptions":{"post":{"tags":["automatic-speech-recognition"],"summary":"Transcribe File","operationId":"transcribe_file_v1_audio_transcriptions_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_transcribe_file_v1_audio_transcriptions_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"type":"string"},{"$ref":"#/components/schemas/CreateTranscriptionResponseJson"},{"$ref":"#/components/schemas/CreateTranscriptionResponseVerboseJson"}],"title":"Response Transcribe File V1 Audio Transcriptions Post"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models":{"get":{"tags":["models"],"summary":"Get Models","operationId":"get_models_v1_models_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListModelsResponse"}}}}}}},"/v1/models/{model_name}":{"get":{"tags":["models"],"summary":"Get Model","operationId":"get_model_v1_models__model_name__get","parameters":[{"name":"model_name","in":"path","required":true,"schema":{"type":"string","title":"Model Name"},"example":"Systran/faster-distil-whisper-large-v3"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Model"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health":{"get":{"tags":["diagnostic"],"summary":"Health","operationId":"health_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/api/pull/{model_name}":{"post":{"tags":["experimental"],"summary":"Download a model from Hugging Face.","operationId":"pull_model_api_pull__model_name__post","parameters":[{"name":"model_name","in":"path","required":true,"schema":{"type":"string","title":"Model Name"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/ps":{"get":{"tags":["experimental"],"summary":"Get a list of loaded models.","operationId":"get_running_models_api_ps_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"items":{"type":"string"},"type":"array"},"type":"object","title":"Response Get Running Models Api Ps Get"}}}}}}},"/api/ps/{model_name}":{"post":{"tags":["experimental"],"summary":"Load a model into memory.","operationId":"load_model_route_api_ps__model_name__post","parameters":[{"name":"model_name","in":"path","required":true,"schema":{"type":"string","title":"Model Name"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["experimental"],"summary":"Unload a model from memory.","operationId":"stop_running_model_api_ps__model_name__delete","parameters":[{"name":"model_name","in":"path","required":true,"schema":{"type":"string","title":"Model Name"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/speech":{"post":{"tags":["speech-to-text"],"summary":"Synthesize","operationId":"synthesize_v1_audio_speech_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateSpeechRequestBody"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/speech/voices":{"get":{"tags":["speech-to-text"],"summary":"List Voices","operationId":"list_voices_v1_audio_speech_voices_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"items":{"$ref":"#/components/schemas/PiperModel"},"type":"array","title":"Response List Voices V1 Audio Speech Voices Get"}}}}}}}},"components":{"schemas":{"Body_transcribe_file_v1_audio_transcriptions_post":{"properties":{"model":{"anyOf":[{"type":"string","description":"The ID of the model. You can get a list of available models by calling `/v1/models`.","examples":["Systran/faster-distil-whisper-large-v3","bofenghuang/whisper-large-v2-cv11-french-ct2"]},{"type":"null"}],"title":"Model"},"language":{"anyOf":[{"$ref":"#/components/schemas/Language"},{"type":"null"}]},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt"},"response_format":{"anyOf":[{"$ref":"#/components/schemas/faster_whisper_server__config__ResponseFormat"},{"type":"null"}]},"temperature":{"type":"number","title":"Temperature","default":0.0},"timestamp_granularities":{"items":{"type":"string","enum":["segment","word"]},"type":"array","title":"Timestamp Granularities","default":["segment"]},"stream":{"type":"boolean","title":"Stream","default":false},"hotwords":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hotwords"},"vad_filter":{"type":"boolean","title":"Vad Filter","default":false},"file":{"type":"string","format":"binary","title":"File"}},"type":"object","required":["file"],"title":"Body_transcribe_file_v1_audio_transcriptions_post"},"Body_translate_file_v1_audio_translations_post":{"properties":{"model":{"anyOf":[{"type":"string","description":"The ID of the model. You can get a list of available models by calling `/v1/models`.","examples":["Systran/faster-distil-whisper-large-v3","bofenghuang/whisper-large-v2-cv11-french-ct2"]},{"type":"null"}],"title":"Model"},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt"},"response_format":{"anyOf":[{"$ref":"#/components/schemas/faster_whisper_server__config__ResponseFormat"},{"type":"null"}]},"temperature":{"type":"number","title":"Temperature","default":0.0},"stream":{"type":"boolean","title":"Stream","default":false},"vad_filter":{"type":"boolean","title":"Vad Filter","default":false},"file":{"type":"string","format":"binary","title":"File"}},"type":"object","required":["file"],"title":"Body_translate_file_v1_audio_translations_post"},"CreateSpeechRequestBody":{"properties":{"model":{"type":"string","enum":["piper"],"const":"piper","title":"Model","description":"The ID of the model. The only supported model is 'piper'.","default":"piper","examples":["piper"]},"input":{"type":"string","title":"Input","description":"The text to generate audio for. ","examples":["A rainbow is an optical phenomenon caused by refraction, internal reflection and dispersion of light in water droplets resulting in a continuous spectrum of light appearing in the sky. The rainbow takes the form of a multicoloured circular arc. Rainbows caused by sunlight always appear in the section of sky directly opposite the Sun. Rainbows can be caused by many forms of airborne water. These include not only rain, but also mist, spray, and airborne dew."]},"voice":{"type":"string","title":"Voice","default":"en_US-amy-medium"},"response_format":{"$ref":"#/components/schemas/faster_whisper_server__routers__speech__ResponseFormat","description":"The format to audio in. Supported formats are mp3, flac, wav, pcm. opus, aac are not supported","default":"mp3","examples":["mp3","flac","wav","pcm"]},"speed":{"type":"number","maximum":4.0,"minimum":0.25,"title":"Speed","default":1.0},"sample_rate":{"anyOf":[{"type":"integer","maximum":48000.0,"minimum":8000.0},{"type":"null"}],"title":"Sample Rate"}},"type":"object","required":["input"],"title":"CreateSpeechRequestBody"},"CreateTranscriptionResponseJson":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"CreateTranscriptionResponseJson"},"CreateTranscriptionResponseVerboseJson":{"properties":{"task":{"type":"string","title":"Task","default":"transcribe"},"language":{"type":"string","title":"Language"},"duration":{"type":"number","title":"Duration"},"text":{"type":"string","title":"Text"},"words":{"anyOf":[{"items":{"$ref":"#/components/schemas/TranscriptionWord"},"type":"array"},{"type":"null"}],"title":"Words"},"segments":{"items":{"$ref":"#/components/schemas/TranscriptionSegment"},"type":"array","title":"Segments"}},"type":"object","required":["language","duration","text","words","segments"],"title":"CreateTranscriptionResponseVerboseJson"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"Language":{"type":"string","enum":["af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","yue","zh"],"title":"Language"},"ListModelsResponse":{"properties":{"data":{"items":{"$ref":"#/components/schemas/Model"},"type":"array","title":"Data"},"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"ListModelsResponse"},"Model":{"properties":{"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object"},"owned_by":{"type":"string","title":"Owned By"},"language":{"items":{"type":"string"},"type":"array","title":"Language"}},"type":"object","required":["id","created","object","owned_by"],"title":"Model","examples":[{"created":1700732060,"id":"Systran/faster-whisper-large-v3","object":"model","owned_by":"Systran"},{"created":1711378296,"id":"Systran/faster-distil-whisper-large-v3","object":"model","owned_by":"Systran"},{"created":1687968011,"id":"bofenghuang/whisper-large-v2-cv11-french-ct2","object":"model","owned_by":"bofenghuang"}]},"PiperModel":{"properties":{"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"created":{"type":"integer","title":"Created"},"owned_by":{"type":"string","enum":["rhasspy"],"const":"rhasspy","title":"Owned By","default":"rhasspy"},"model_path":{"type":"string","format":"path","title":"Model Path","examples":["/home/nixos/.cache/huggingface/hub/models--rhasspy--piper-voices/snapshots/3d796cc2f2c884b3517c527507e084f7bb245aea/en/en_US/amy/medium/en_US-amy-medium.onnx"]},"id":{"type":"string","title":"Id","readOnly":true,"examples":["rhasspy/piper-voices/en_US-amy-medium"]},"voice":{"type":"string","title":"Voice","readOnly":true,"examples":["rhasspy/piper-voices/en_US-amy-medium"]},"config_path":{"type":"string","format":"path","title":"Config Path","readOnly":true},"quality":{"type":"string","enum":["x_low","low","medium","high"],"title":"Quality","readOnly":true},"sample_rate":{"type":"integer","title":"Sample Rate","readOnly":true}},"type":"object","required":["created","model_path","id","voice","config_path","quality","sample_rate"],"title":"PiperModel","description":"Similar structure to the GET /v1/models response but with extra fields."},"TranscriptionSegment":{"properties":{"id":{"type":"integer","title":"Id"},"seek":{"type":"integer","title":"Seek"},"start":{"type":"number","title":"Start"},"end":{"type":"number","title":"End"},"text":{"type":"string","title":"Text"},"tokens":{"items":{"type":"integer"},"type":"array","title":"Tokens"},"temperature":{"type":"number","title":"Temperature"},"avg_logprob":{"type":"number","title":"Avg Logprob"},"compression_ratio":{"type":"number","title":"Compression Ratio"},"no_speech_prob":{"type":"number","title":"No Speech Prob"},"words":{"anyOf":[{"items":{"$ref":"#/components/schemas/TranscriptionWord"},"type":"array"},{"type":"null"}],"title":"Words"}},"type":"object","required":["id","seek","start","end","text","tokens","temperature","avg_logprob","compression_ratio","no_speech_prob","words"],"title":"TranscriptionSegment"},"TranscriptionWord":{"properties":{"start":{"type":"number","title":"Start"},"end":{"type":"number","title":"End"},"word":{"type":"string","title":"Word"},"probability":{"type":"number","title":"Probability"}},"type":"object","required":["start","end","word","probability"],"title":"TranscriptionWord"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"faster_whisper_server__config__ResponseFormat":{"type":"string","enum":["text","json","verbose_json","srt","vtt"],"title":"ResponseFormat"},"faster_whisper_server__routers__speech__ResponseFormat":{"type":"string","enum":["mp3","flac","wav","pcm"]}}},"tags":[{"name":"automatic-speech-recognition"},{"name":"speech-to-text"},{"name":"models"},{"name":"diagnostic"},{"name":"experimental","description":"Not meant for public use yet. May change or be removed at any time."}]}

+{"openapi":"3.1.0","info":{"title":"FastAPI","version":"0.1.0"},"paths":{"/v1/audio/translations":{"post":{"tags":["automatic-speech-recognition"],"summary":"Translate File","operationId":"translate_file_v1_audio_translations_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_translate_file_v1_audio_translations_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"type":"string"},{"$ref":"#/components/schemas/CreateTranscriptionResponseJson"},{"$ref":"#/components/schemas/CreateTranscriptionResponseVerboseJson"}],"title":"Response Translate File V1 Audio Translations Post"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/transcriptions":{"post":{"tags":["automatic-speech-recognition"],"summary":"Transcribe File","operationId":"transcribe_file_v1_audio_transcriptions_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_transcribe_file_v1_audio_transcriptions_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"anyOf":[{"type":"string"},{"$ref":"#/components/schemas/CreateTranscriptionResponseJson"},{"$ref":"#/components/schemas/CreateTranscriptionResponseVerboseJson"}],"title":"Response Transcribe File V1 Audio Transcriptions Post"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/models":{"get":{"tags":["models"],"summary":"Get Models","operationId":"get_models_v1_models_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ListModelsResponse"}}}}}}},"/v1/models/{model_name}":{"get":{"tags":["models"],"summary":"Get Model","operationId":"get_model_v1_models__model_name__get","parameters":[{"name":"model_name","in":"path","required":true,"schema":{"type":"string","title":"Model Name"},"example":"Systran/faster-distil-whisper-large-v3"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/Model"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/health":{"get":{"tags":["diagnostic"],"summary":"Health","operationId":"health_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/api/pull/{model_name}":{"post":{"tags":["experimental"],"summary":"Download a model from Hugging Face.","operationId":"pull_model_api_pull__model_name__post","parameters":[{"name":"model_name","in":"path","required":true,"schema":{"type":"string","title":"Model Name"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/api/ps":{"get":{"tags":["experimental"],"summary":"Get a list of loaded models.","operationId":"get_running_models_api_ps_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"additionalProperties":{"items":{"type":"string"},"type":"array"},"type":"object","title":"Response Get Running Models Api Ps Get"}}}}}}},"/api/ps/{model_name}":{"post":{"tags":["experimental"],"summary":"Load a model into memory.","operationId":"load_model_route_api_ps__model_name__post","parameters":[{"name":"model_name","in":"path","required":true,"schema":{"type":"string","title":"Model Name"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}},"delete":{"tags":["experimental"],"summary":"Unload a model from memory.","operationId":"stop_running_model_api_ps__model_name__delete","parameters":[{"name":"model_name","in":"path","required":true,"schema":{"type":"string","title":"Model Name"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/speech":{"post":{"tags":["speech-to-text"],"summary":"Synthesize","operationId":"synthesize_v1_audio_speech_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/CreateSpeechRequestBody"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v1/audio/speech/voices":{"get":{"tags":["speech-to-text"],"summary":"List Voices","operationId":"list_voices_v1_audio_speech_voices_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"items":{"$ref":"#/components/schemas/PiperModel"},"type":"array","title":"Response List Voices V1 Audio Speech Voices Get"}}}}}}}},"components":{"schemas":{"Body_transcribe_file_v1_audio_transcriptions_post":{"properties":{"model":{"anyOf":[{"type":"string","description":"The ID of the model. You can get a list of available models by calling `/v1/models`.","examples":["Systran/faster-distil-whisper-large-v3","bofenghuang/whisper-large-v2-cv11-french-ct2"]},{"type":"null"}],"title":"Model"},"language":{"anyOf":[{"$ref":"#/components/schemas/Language"},{"type":"null"}]},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt"},"response_format":{"anyOf":[{"$ref":"#/components/schemas/speaches__config__ResponseFormat"},{"type":"null"}]},"temperature":{"type":"number","title":"Temperature","default":0.0},"timestamp_granularities":{"items":{"type":"string","enum":["segment","word"]},"type":"array","title":"Timestamp Granularities","default":["segment"]},"stream":{"type":"boolean","title":"Stream","default":false},"hotwords":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hotwords"},"vad_filter":{"type":"boolean","title":"Vad Filter","default":false},"file":{"type":"string","format":"binary","title":"File"}},"type":"object","required":["file"],"title":"Body_transcribe_file_v1_audio_transcriptions_post"},"Body_translate_file_v1_audio_translations_post":{"properties":{"model":{"anyOf":[{"type":"string","description":"The ID of the model. You can get a list of available models by calling `/v1/models`.","examples":["Systran/faster-distil-whisper-large-v3","bofenghuang/whisper-large-v2-cv11-french-ct2"]},{"type":"null"}],"title":"Model"},"prompt":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Prompt"},"response_format":{"anyOf":[{"$ref":"#/components/schemas/speaches__config__ResponseFormat"},{"type":"null"}]},"temperature":{"type":"number","title":"Temperature","default":0.0},"stream":{"type":"boolean","title":"Stream","default":false},"vad_filter":{"type":"boolean","title":"Vad Filter","default":false},"file":{"type":"string","format":"binary","title":"File"}},"type":"object","required":["file"],"title":"Body_translate_file_v1_audio_translations_post"},"CreateSpeechRequestBody":{"properties":{"model":{"type":"string","enum":["piper"],"const":"piper","title":"Model","description":"The ID of the model. The only supported model is 'piper'.","default":"piper","examples":["piper"]},"input":{"type":"string","title":"Input","description":"The text to generate audio for. ","examples":["A rainbow is an optical phenomenon caused by refraction, internal reflection and dispersion of light in water droplets resulting in a continuous spectrum of light appearing in the sky. The rainbow takes the form of a multicoloured circular arc. Rainbows caused by sunlight always appear in the section of sky directly opposite the Sun. Rainbows can be caused by many forms of airborne water. These include not only rain, but also mist, spray, and airborne dew."]},"voice":{"type":"string","title":"Voice","default":"en_US-amy-medium"},"response_format":{"$ref":"#/components/schemas/speaches__routers__speech__ResponseFormat","description":"The format to audio in. Supported formats are mp3, flac, wav, pcm. opus, aac are not supported","default":"mp3","examples":["mp3","flac","wav","pcm"]},"speed":{"type":"number","maximum":4.0,"minimum":0.25,"title":"Speed","default":1.0},"sample_rate":{"anyOf":[{"type":"integer","maximum":48000.0,"minimum":8000.0},{"type":"null"}],"title":"Sample Rate"}},"type":"object","required":["input"],"title":"CreateSpeechRequestBody"},"CreateTranscriptionResponseJson":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"CreateTranscriptionResponseJson"},"CreateTranscriptionResponseVerboseJson":{"properties":{"task":{"type":"string","title":"Task","default":"transcribe"},"language":{"type":"string","title":"Language"},"duration":{"type":"number","title":"Duration"},"text":{"type":"string","title":"Text"},"words":{"anyOf":[{"items":{"$ref":"#/components/schemas/TranscriptionWord"},"type":"array"},{"type":"null"}],"title":"Words"},"segments":{"items":{"$ref":"#/components/schemas/TranscriptionSegment"},"type":"array","title":"Segments"}},"type":"object","required":["language","duration","text","words","segments"],"title":"CreateTranscriptionResponseVerboseJson"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"Language":{"type":"string","enum":["af","am","ar","as","az","ba","be","bg","bn","bo","br","bs","ca","cs","cy","da","de","el","en","es","et","eu","fa","fi","fo","fr","gl","gu","ha","haw","he","hi","hr","ht","hu","hy","id","is","it","ja","jw","ka","kk","km","kn","ko","la","lb","ln","lo","lt","lv","mg","mi","mk","ml","mn","mr","ms","mt","my","ne","nl","nn","no","oc","pa","pl","ps","pt","ro","ru","sa","sd","si","sk","sl","sn","so","sq","sr","su","sv","sw","ta","te","tg","th","tk","tl","tr","tt","uk","ur","uz","vi","yi","yo","yue","zh"],"title":"Language"},"ListModelsResponse":{"properties":{"data":{"items":{"$ref":"#/components/schemas/Model"},"type":"array","title":"Data"},"object":{"type":"string","enum":["list"],"const":"list","title":"Object","default":"list"}},"type":"object","required":["data"],"title":"ListModelsResponse"},"Model":{"properties":{"id":{"type":"string","title":"Id"},"created":{"type":"integer","title":"Created"},"object":{"type":"string","enum":["model"],"const":"model","title":"Object"},"owned_by":{"type":"string","title":"Owned By"},"language":{"items":{"type":"string"},"type":"array","title":"Language"}},"type":"object","required":["id","created","object","owned_by"],"title":"Model","examples":[{"created":1700732060,"id":"Systran/faster-whisper-large-v3","object":"model","owned_by":"Systran"},{"created":1711378296,"id":"Systran/faster-distil-whisper-large-v3","object":"model","owned_by":"Systran"},{"created":1687968011,"id":"bofenghuang/whisper-large-v2-cv11-french-ct2","object":"model","owned_by":"bofenghuang"}]},"PiperModel":{"properties":{"object":{"type":"string","enum":["model"],"const":"model","title":"Object","default":"model"},"created":{"type":"integer","title":"Created"},"owned_by":{"type":"string","enum":["rhasspy"],"const":"rhasspy","title":"Owned By","default":"rhasspy"},"model_path":{"type":"string","format":"path","title":"Model Path","examples":["/home/nixos/.cache/huggingface/hub/models--rhasspy--piper-voices/snapshots/3d796cc2f2c884b3517c527507e084f7bb245aea/en/en_US/amy/medium/en_US-amy-medium.onnx"]},"id":{"type":"string","title":"Id","readOnly":true,"examples":["rhasspy/piper-voices/en_US-amy-medium"]},"voice":{"type":"string","title":"Voice","readOnly":true,"examples":["rhasspy/piper-voices/en_US-amy-medium"]},"config_path":{"type":"string","format":"path","title":"Config Path","readOnly":true},"quality":{"type":"string","enum":["x_low","low","medium","high"],"title":"Quality","readOnly":true},"sample_rate":{"type":"integer","title":"Sample Rate","readOnly":true}},"type":"object","required":["created","model_path","id","voice","config_path","quality","sample_rate"],"title":"PiperModel","description":"Similar structure to the GET /v1/models response but with extra fields."},"TranscriptionSegment":{"properties":{"id":{"type":"integer","title":"Id"},"seek":{"type":"integer","title":"Seek"},"start":{"type":"number","title":"Start"},"end":{"type":"number","title":"End"},"text":{"type":"string","title":"Text"},"tokens":{"items":{"type":"integer"},"type":"array","title":"Tokens"},"temperature":{"type":"number","title":"Temperature"},"avg_logprob":{"type":"number","title":"Avg Logprob"},"compression_ratio":{"type":"number","title":"Compression Ratio"},"no_speech_prob":{"type":"number","title":"No Speech Prob"},"words":{"anyOf":[{"items":{"$ref":"#/components/schemas/TranscriptionWord"},"type":"array"},{"type":"null"}],"title":"Words"}},"type":"object","required":["id","seek","start","end","text","tokens","temperature","avg_logprob","compression_ratio","no_speech_prob","words"],"title":"TranscriptionSegment"},"TranscriptionWord":{"properties":{"start":{"type":"number","title":"Start"},"end":{"type":"number","title":"End"},"word":{"type":"string","title":"Word"},"probability":{"type":"number","title":"Probability"}},"type":"object","required":["start","end","word","probability"],"title":"TranscriptionWord"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"speaches__config__ResponseFormat":{"type":"string","enum":["text","json","verbose_json","srt","vtt"],"title":"ResponseFormat"},"speaches__routers__speech__ResponseFormat":{"type":"string","enum":["mp3","flac","wav","pcm"]}}},"tags":[{"name":"automatic-speech-recognition"},{"name":"speech-to-text"},{"name":"models"},{"name":"diagnostic"},{"name":"experimental","description":"Not meant for public use yet. May change or be removed at any time."}]}

9922993

43cc67a

docs/usage/open-webui-integration.md

--- docs/usage/open-webui-integration.md

+++ docs/usage/open-webui-integration.md


 2. Click on the "Audio" tab
 3. Update settings
    - Speech-to-Text Engine: OpenAI
-   - API Base URL: http://faster-whisper-server:8000/v1
+   - API Base URL: http://speaches:8000/v1
    - API Key: does-not-matter-what-you-put-but-should-not-be-empty
    - Model: Systran/faster-distil-whisper-large-v3
 4. Click "Save"

       ...
       # Environment variables are documented here https://docs.openwebui.com/getting-started/env-configuration#speech-to-text
       AUDIO_STT_ENGINE: "openai"
-      AUDIO_STT_OPENAI_API_BASE_URL: "http://faster-whisper-server:8000/v1"
+      AUDIO_STT_OPENAI_API_BASE_URL: "http://speaches:8000/v1"
       AUDIO_STT_OPENAI_API_KEY: "does-not-matter-what-you-put-but-should-not-be-empty"
       AUDIO_STT_MODEL: "Systran/faster-distil-whisper-large-v3"
-  faster-whisper-server:
-    image: fedirz/faster-whisper-server:latest-cuda
+  speaches:
+    image: ghcr.io/speaches-ai/speaches:latest-cuda
     ...
 ```

9922993

43cc67a

docs/usage/text-to-speech.md

--- docs/usage/text-to-speech.md

+++ docs/usage/text-to-speech.md


 
     This feature not supported on ARM devices only x86_64. I was unable to build [piper-phonemize](https://github.com/rhasspy/piper-phonemize)(my [fork](https://github.com/fedirz/piper-phonemize))
 
-http://localhost:8001/faster-whisper-server/api/
 TODO: add a note about automatic downloads
 TODO: add a demo
 TODO: add a note about tts only running on cpu

 
 ```bash
 # Download all voices (~15 minutes / 7.7 Gbs)
-docker exec -it faster-whisper-server huggingface-cli download rhasspy/piper-voices
+docker exec -it speaches huggingface-cli download rhasspy/piper-voices
 # Download all English voices (~4.5 minutes)
-docker exec -it faster-whisper-server huggingface-cli download rhasspy/piper-voices --include 'en/**/*' 'voices.json'
+docker exec -it speaches huggingface-cli download rhasspy/piper-voices --include 'en/**/*' 'voices.json'
 # Download all qualities of a specific voice (~4 seconds)
-docker exec -it faster-whisper-server huggingface-cli download rhasspy/piper-voices --include 'en/en_US/amy/**/*' 'voices.json'
+docker exec -it speaches huggingface-cli download rhasspy/piper-voices --include 'en/en_US/amy/**/*' 'voices.json'
 # Download specific quality of a specific voice (~2 seconds)
-docker exec -it faster-whisper-server huggingface-cli download rhasspy/piper-voices --include 'en/en_US/amy/medium/*' 'voices.json'
+docker exec -it speaches huggingface-cli download rhasspy/piper-voices --include 'en/en_US/amy/medium/*' 'voices.json'
 ```
 
 !!! note

9922993

43cc67a

examples/javascript/index.js

--- examples/javascript/index.js

+++ examples/javascript/index.js


 /**
- * Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26
+ * Example provided by https://github.com/Gan-Xing in https://github.com/speaches-ai/speaches/issues/26
  */
 import 'dotenv/config';
 import fs from 'node:fs';

9922993

43cc67a

examples/live-audio/script.sh

--- examples/live-audio/script.sh

+++ examples/live-audio/script.sh


 
 export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
 
-# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
-docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
+# Ensure you have `speaches` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
+docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL ghcr.io/speaches-ai/speaches:latest-cuda
 # or you can run it on a CPU
-# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
+# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL ghcr.io/speaches-ai/speaches:latest-cpu
 
 # `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -`
 # shellcheck disable=SC2002

9922993

43cc67a

examples/youtube/script.sh

--- examples/youtube/script.sh

+++ examples/youtube/script.sh


 # NOTE: do not use any distil-* model other than the large ones as they don't work on long audio files for some reason.
 export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
 
-# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
-docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
+# Ensure you have `speaches` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
+docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL ghcr.io/speaches-ai/speaches:latest-cuda
 # or you can run it on a CPU
-# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
+# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL ghcr.io/speaches-ai/speaches:latest-cpu
 
 # Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl
 youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs'

9922993

43cc67a

mkdocs.yml

--- mkdocs.yml

+++ mkdocs.yml


 # yaml-language-server: $schema=https://squidfunk.github.io/mkdocs-material/schema.json
 # https://www.mkdocs.org/user-guide/configuration/#configuration
-site_name: Faster Whisper Server Documentation
-site_url: https://fedirz.github.io/faster-whisper-server/
-repo_url: https://github.com/fedirz/faster-whisper-server/
+site_name: Speaches Documentation
+site_url: https://speaches-ai.github.io/speaches/
+repo_url: https://github.com/speaches-ai/speaches/
 edit_uri: edit/master/docs/
 docs_dir: docs
 theme:

9922993

43cc67a

pyproject.toml

--- pyproject.toml

+++ pyproject.toml


 [project]
-name = "faster-whisper-server"
+name = "speaches"
 version = "0.1.0"
 requires-python = ">=3.12,<3.13"
 # https://packaging.python.org/en/latest/specifications/version-specifiers/#id5

9922993

43cc67a

src/speaches/__init__.py (Renamed from src/faster_whisper_server/__init__.py)

--- src/faster_whisper_server/__init__.py

+++ src/speaches/__init__.py

No changes

9922993

43cc67a

src/speaches/api_models.py (Renamed from src/faster_whisper_server/api_models.py)

--- src/faster_whisper_server/api_models.py

+++ src/speaches/api_models.py


 
 from pydantic import BaseModel, ConfigDict, Field
 
-from faster_whisper_server.text_utils import Transcription, canonicalize_word, segments_to_text
+from speaches.text_utils import Transcription, canonicalize_word, segments_to_text
 
 if TYPE_CHECKING:
     from collections.abc import Iterable

     def from_segments(cls, segments: Iterable[TranscriptionSegment]) -> list[TranscriptionWord]:
         words: list[TranscriptionWord] = []
         for segment in segments:
-            # NOTE: a temporary "fix" for https://github.com/fedirz/faster-whisper-server/issues/58.
+            # NOTE: a temporary "fix" for https://github.com/speaches-ai/speaches/issues/58.
             # TODO: properly address the issue
             assert (
                 segment.words is not None

9922993

43cc67a

src/speaches/asr.py (Renamed from src/faster_whisper_server/asr.py)

--- src/faster_whisper_server/asr.py

+++ src/speaches/asr.py


 import time
 from typing import TYPE_CHECKING
 
-from faster_whisper_server.api_models import TranscriptionSegment, TranscriptionWord
-from faster_whisper_server.text_utils import Transcription
+from speaches.api_models import TranscriptionSegment, TranscriptionWord
+from speaches.text_utils import Transcription
 
 if TYPE_CHECKING:
     from faster_whisper import transcribe
 
-    from faster_whisper_server.audio import Audio
+    from speaches.audio import Audio
 
 logger = logging.getLogger(__name__)
 

9922993

43cc67a

src/speaches/audio.py (Renamed from src/faster_whisper_server/audio.py)

--- src/faster_whisper_server/audio.py

+++ src/speaches/audio.py


 import numpy as np
 import soundfile as sf
 
-from faster_whisper_server.config import SAMPLES_PER_SECOND
+from speaches.config import SAMPLES_PER_SECOND
 
 if TYPE_CHECKING:
     from collections.abc import AsyncGenerator

9922993

43cc67a

src/speaches/config.py (Renamed from src/faster_whisper_server/config.py)

--- src/faster_whisper_server/config.py

+++ src/speaches/config.py

No changes

9922993

43cc67a

src/speaches/dependencies.py (Renamed from src/faster_whisper_server/dependencies.py)

--- src/faster_whisper_server/dependencies.py

+++ src/speaches/dependencies.py


 from openai.resources.audio import AsyncSpeech, AsyncTranscriptions
 from openai.resources.chat.completions import AsyncCompletions
 
-from faster_whisper_server.config import Config
-from faster_whisper_server.model_manager import PiperModelManager, WhisperModelManager
+from speaches.config import Config
+from speaches.model_manager import PiperModelManager, WhisperModelManager
 
 logger = logging.getLogger(__name__)
 

     config = get_config()
     if config.speech_base_url is None:
         # this might not work as expected if `speech_router` won't have shared state (access to the same `model_manager`) with the main FastAPI `app`. TODO: verify  # noqa: E501
-        from faster_whisper_server.routers.speech import (
+        from speaches.routers.speech import (
             router as speech_router,
         )
 

     config = get_config()
     if config.transcription_base_url is None:
         # this might not work as expected if `transcription_router` won't have shared state (access to the same `model_manager`) with the main FastAPI `app`. TODO: verify  # noqa: E501
-        from faster_whisper_server.routers.stt import (
+        from speaches.routers.stt import (
             router as stt_router,
         )
 

9922993

43cc67a

src/speaches/gradio_app.py (Renamed from src/faster_whisper_server/gradio_app.py)

--- src/faster_whisper_server/gradio_app.py

+++ src/speaches/gradio_app.py


 from httpx_sse import aconnect_sse
 from openai import AsyncOpenAI
 
-from faster_whisper_server.config import Config, Task
-from faster_whisper_server.hf_utils import PiperModel
+from speaches.config import Config, Task
+from speaches.hf_utils import PiperModel
 
 TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
 TRANSLATION_ENDPOINT = "/v1/audio/translations"

             file.write(audio_bytes)
         return file_path
 
-    with gr.Blocks(title="faster-whisper-server Playground") as demo:
+    with gr.Blocks(title="Speaches Playground") as demo:
         gr.Markdown(
-            "### Consider supporting the project by starring the [repository on GitHub](https://github.com/fedirz/faster-whisper-server)."
+            "### Consider supporting the project by starring the [repository on GitHub](https://github.com/speaches-ai/speaches)."
         )
         with gr.Tab(label="Transcribe/Translate"):
             audio = gr.Audio(type="filepath")

 
         with gr.Tab(label="Speech Generation"):
             if platform.machine() != "x86_64":
-                from faster_whisper_server.routers.speech import (
+                from speaches.routers.speech import (
                     DEFAULT_VOICE,
                     MAX_SAMPLE_RATE,
                     MIN_SAMPLE_RATE,

9922993

43cc67a

src/speaches/hf_utils.py (Renamed from src/faster_whisper_server/hf_utils.py)

--- src/faster_whisper_server/hf_utils.py

+++ src/speaches/hf_utils.py


 from huggingface_hub.constants import HF_HUB_CACHE
 from pydantic import BaseModel, Field, computed_field
 
-from faster_whisper_server.api_models import Model
+from speaches.api_models import Model
 
 logger = logging.getLogger(__name__)
 

9922993

43cc67a

src/speaches/logger.py (Renamed from src/faster_whisper_server/logger.py)

--- src/faster_whisper_server/logger.py

+++ src/speaches/logger.py

No changes

9922993

43cc67a

src/speaches/main.py (Renamed from src/faster_whisper_server/main.py)

--- src/faster_whisper_server/main.py

+++ src/speaches/main.py


 )
 from fastapi.middleware.cors import CORSMiddleware
 
-from faster_whisper_server.dependencies import ApiKeyDependency, get_config, get_model_manager
-from faster_whisper_server.logger import setup_logger
-from faster_whisper_server.routers.misc import (
+from speaches.dependencies import ApiKeyDependency, get_config, get_model_manager
+from speaches.logger import setup_logger
+from speaches.routers.misc import (
     router as misc_router,
 )
-from faster_whisper_server.routers.models import (
+from speaches.routers.models import (
     router as models_router,
 )
-from faster_whisper_server.routers.stt import (
+from speaches.routers.stt import (
     router as stt_router,
 )
 

     logger.debug(f"Config: {config}")
 
     if platform.machine() == "x86_64":
-        from faster_whisper_server.routers.speech import (
+        from speaches.routers.speech import (
             router as speech_router,
         )
     else:

     if config.enable_ui:
         import gradio as gr
 
-        from faster_whisper_server.gradio_app import create_gradio_demo
+        from speaches.gradio_app import create_gradio_demo
 
         app = gr.mount_gradio_app(app, create_gradio_demo(config), path="/")
 

9922993

43cc67a

src/speaches/model_manager.py (Renamed from src/faster_whisper_server/model_manager.py)

--- src/faster_whisper_server/model_manager.py

+++ src/speaches/model_manager.py


 
 from faster_whisper import WhisperModel
 
-from faster_whisper_server.hf_utils import get_piper_voice_model_file
+from speaches.hf_utils import get_piper_voice_model_file
 
 if TYPE_CHECKING:
     from collections.abc import Callable
 
     from piper.voice import PiperVoice
 
-    from faster_whisper_server.config import (
+    from speaches.config import (
         WhisperConfig,
     )
 

9922993

43cc67a

src/speaches/routers/__init__.py (Renamed from src/faster_whisper_server/routers/__init__.py)

--- src/faster_whisper_server/routers/__init__.py

+++ src/speaches/routers/__init__.py

No changes

9922993

43cc67a

src/speaches/routers/misc.py (Renamed from src/faster_whisper_server/routers/misc.py)

--- src/faster_whisper_server/routers/misc.py

+++ src/speaches/routers/misc.py


 import huggingface_hub
 from huggingface_hub.hf_api import RepositoryNotFoundError
 
-from faster_whisper_server import hf_utils
-from faster_whisper_server.dependencies import ModelManagerDependency  # noqa: TCH001
+from speaches import hf_utils
+from speaches.dependencies import ModelManagerDependency  # noqa: TC001
 
 router = APIRouter()
 

9922993

43cc67a

src/speaches/routers/models.py (Renamed from src/faster_whisper_server/routers/models.py)

--- src/faster_whisper_server/routers/models.py

+++ src/speaches/routers/models.py


 )
 import huggingface_hub
 
-from faster_whisper_server.api_models import (
+from speaches.api_models import (
     ListModelsResponse,
     Model,
 )
-from faster_whisper_server.hf_utils import list_whisper_models
+from speaches.hf_utils import list_whisper_models
 
 if TYPE_CHECKING:
     from huggingface_hub.hf_api import ModelInfo

9922993

43cc67a

src/speaches/routers/speech.py (Renamed from src/faster_whisper_server/routers/speech.py)

--- src/faster_whisper_server/routers/speech.py

+++ src/speaches/routers/speech.py


 from pydantic import BaseModel, BeforeValidator, Field, ValidationError, model_validator
 import soundfile as sf
 
-from faster_whisper_server.dependencies import PiperModelManagerDependency
-from faster_whisper_server.hf_utils import (
+from speaches.dependencies import PiperModelManagerDependency
+from speaches.hf_utils import (
     PiperModel,
     list_piper_models,
     read_piper_voices_config,

9922993

43cc67a

src/speaches/routers/stt.py (Renamed from src/faster_whisper_server/routers/stt.py)

--- src/faster_whisper_server/routers/stt.py

+++ src/speaches/routers/stt.py


 from numpy.typing import NDArray
 from pydantic import AfterValidator, Field
 
-from faster_whisper_server.api_models import (
+from speaches.api_models import (
     DEFAULT_TIMESTAMP_GRANULARITIES,
     TIMESTAMP_GRANULARITIES_COMBINATIONS,
     CreateTranscriptionResponseJson,

     TimestampGranularities,
     TranscriptionSegment,
 )
-from faster_whisper_server.asr import FasterWhisperASR
-from faster_whisper_server.audio import AudioStream, audio_samples_from_file
-from faster_whisper_server.config import (
+from speaches.asr import FasterWhisperASR
+from speaches.audio import AudioStream, audio_samples_from_file
+from speaches.config import (
     SAMPLES_PER_SECOND,
     Language,
     ResponseFormat,
     Task,
 )
-from faster_whisper_server.dependencies import ConfigDependency, ModelManagerDependency, get_config
-from faster_whisper_server.text_utils import segments_to_srt, segments_to_text, segments_to_vtt
-from faster_whisper_server.transcriber import audio_transcriber
+from speaches.dependencies import ConfigDependency, ModelManagerDependency, get_config
+from speaches.text_utils import segments_to_srt, segments_to_text, segments_to_vtt
+from speaches.transcriber import audio_transcriber
 
 if TYPE_CHECKING:
     from collections.abc import Generator, Iterable

         ) from e
     except Exception as e:
         logger.exception(
-            "Failed to decode audio. This is likely a bug. Please create an issue at https://github.com/fedirz/faster-whisper-server/issues/new."
+            "Failed to decode audio. This is likely a bug. Please create an issue at https://github.com/speaches-ai/speaches/issues/new."
         )
         raise HTTPException(status_code=500, detail="Failed to decode audio.") from e
     else:

9922993

43cc67a

src/speaches/text_utils.py (Renamed from src/faster_whisper_server/text_utils.py)

--- src/faster_whisper_server/text_utils.py

+++ src/speaches/text_utils.py


 if TYPE_CHECKING:
     from collections.abc import Iterable
 
-    from faster_whisper_server.api_models import TranscriptionSegment, TranscriptionWord
+    from speaches.api_models import TranscriptionSegment, TranscriptionWord
 
 
 class Transcription:

         self.words.extend(words)
 
     def _ensure_no_word_overlap(self, words: list[TranscriptionWord]) -> None:
-        from faster_whisper_server.dependencies import get_config  # HACK: avoid circular import
+        from speaches.dependencies import get_config  # HACK: avoid circular import
 
         config = get_config()  # HACK
         if len(self.words) > 0 and len(words) > 0:

9922993

43cc67a

src/speaches/text_utils_test.py (Renamed from src/faster_whisper_server/text_utils_test.py)

--- src/faster_whisper_server/text_utils_test.py

+++ src/speaches/text_utils_test.py


-from faster_whisper_server.api_models import TranscriptionWord
-from faster_whisper_server.text_utils import (
+from speaches.api_models import TranscriptionWord
+from speaches.text_utils import (
     canonicalize_word,
     common_prefix,
     is_eos,

9922993

43cc67a

src/speaches/transcriber.py (Renamed from src/faster_whisper_server/transcriber.py)

--- src/faster_whisper_server/transcriber.py

+++ src/speaches/transcriber.py


 import logging
 from typing import TYPE_CHECKING
 
-from faster_whisper_server.audio import Audio, AudioStream
-from faster_whisper_server.text_utils import Transcription, common_prefix, to_full_sentences, word_to_text
+from speaches.audio import Audio, AudioStream
+from speaches.text_utils import Transcription, common_prefix, to_full_sentences, word_to_text
 
 if TYPE_CHECKING:
     from collections.abc import AsyncGenerator
 
-    from faster_whisper_server.api_models import TranscriptionWord
-    from faster_whisper_server.asr import FasterWhisperASR
+    from speaches.api_models import TranscriptionWord
+    from speaches.asr import FasterWhisperASR
 
 logger = logging.getLogger(__name__)
 

9922993

43cc67a

tests/api_timestamp_granularities_test.py

--- tests/api_timestamp_granularities_test.py

+++ tests/api_timestamp_granularities_test.py


 from openai import AsyncOpenAI
 import pytest
 
-from faster_whisper_server.api_models import TIMESTAMP_GRANULARITIES_COMBINATIONS, TimestampGranularities
+from speaches.api_models import TIMESTAMP_GRANULARITIES_COMBINATIONS, TimestampGranularities
 
 
 @pytest.mark.asyncio

9922993

43cc67a

tests/conftest.py

--- tests/conftest.py

+++ tests/conftest.py


 import pytest_asyncio
 from pytest_mock import MockerFixture
 
-from faster_whisper_server.config import Config, WhisperConfig
-from faster_whisper_server.dependencies import get_config
-from faster_whisper_server.main import create_app
+from speaches.config import Config, WhisperConfig
+from speaches.dependencies import get_config
+from speaches.main import create_app
 
 DISABLE_LOGGERS = ["multipart.multipart", "faster_whisper"]
 OPENAI_BASE_URL = "https://api.openai.com/v1"

     @asynccontextmanager
     async def inner(config: Config = DEFAULT_CONFIG) -> AsyncGenerator[AsyncClient, None]:
         # NOTE: all calls to `get_config` should be patched. One way to test that this works is to update the original `get_config` to raise an exception and see if the tests fail  # noqa: E501
-        mocker.patch("faster_whisper_server.dependencies.get_config", return_value=config)
-        mocker.patch("faster_whisper_server.main.get_config", return_value=config)
+        mocker.patch("speaches.dependencies.get_config", return_value=config)
+        mocker.patch("speaches.main.get_config", return_value=config)
         # NOTE: I couldn't get the following to work but it shouldn't matter
         # mocker.patch(
-        #     "faster_whisper_server.text_utils.Transcription._ensure_no_word_overlap.get_config", return_value=config
+        #     "speaches.text_utils.Transcription._ensure_no_word_overlap.get_config", return_value=config
         # )
 
         app = create_app()

9922993

43cc67a

tests/model_manager_test.py

--- tests/model_manager_test.py

+++ tests/model_manager_test.py


 import anyio
 import pytest
 
-from faster_whisper_server.config import Config, WhisperConfig
+from speaches.config import Config, WhisperConfig
 from tests.conftest import DEFAULT_WHISPER_MODEL, AclientFactory
 
 MODEL = DEFAULT_WHISPER_MODEL  # just to make the test more readable

9922993

43cc67a

tests/openai_timestamp_granularities_test.py

--- tests/openai_timestamp_granularities_test.py

+++ tests/openai_timestamp_granularities_test.py


 from openai import AsyncOpenAI, BadRequestError
 import pytest
 
-from faster_whisper_server.api_models import TIMESTAMP_GRANULARITIES_COMBINATIONS, TimestampGranularities
+from speaches.api_models import TIMESTAMP_GRANULARITIES_COMBINATIONS, TimestampGranularities
 
 
 @pytest.mark.asyncio

9922993

43cc67a

tests/speech_test.py

--- tests/speech_test.py

+++ tests/speech_test.py


 if platform_machine != "x86_64":
     pytest.skip("Only supported on x86_64", allow_module_level=True)
 
-from faster_whisper_server.routers.speech import (  # noqa: E402
+from speaches.routers.speech import (  # noqa: E402
     DEFAULT_MODEL,
     DEFAULT_RESPONSE_FORMAT,
     DEFAULT_VOICE,

9922993

43cc67a

tests/sse_test.py

--- tests/sse_test.py

+++ tests/sse_test.py


 import webvtt
 import webvtt.vtt
 
-from faster_whisper_server.api_models import (
+from speaches.api_models import (
     CreateTranscriptionResponseJson,
     CreateTranscriptionResponseVerboseJson,
 )

9922993

43cc67a

uv.lock

--- uv.lock

+++ uv.lock


 ]
 
 [[package]]
-name = "faster-whisper-server"
-version = "0.1.0"
-source = { editable = "." }
-dependencies = [
-    { name = "ctranslate2" },
-    { name = "fastapi" },
-    { name = "faster-whisper" },
-    { name = "huggingface-hub", extra = ["hf-transfer"] },
-    { name = "numpy" },
-    { name = "piper-phonemize", marker = "platform_machine == 'x86_64'" },
-    { name = "piper-tts", marker = "platform_machine == 'x86_64'" },
-    { name = "pydantic" },
-    { name = "pydantic-settings" },
-    { name = "python-multipart" },
-    { name = "sounddevice" },
-    { name = "soundfile" },
-    { name = "uvicorn" },
-]
-
-[package.optional-dependencies]
-client = [
-    { name = "keyboard" },
-]
-dev = [
-    { name = "anyio" },
-    { name = "basedpyright" },
-    { name = "mdx-truly-sane-lists" },
-    { name = "mkdocs-material" },
-    { name = "mkdocs-render-swagger-plugin" },
-    { name = "mkdocstrings", extra = ["python"] },
-    { name = "pre-commit" },
-    { name = "pytest" },
-    { name = "pytest-antilru" },
-    { name = "pytest-asyncio" },
-    { name = "pytest-mock" },
-    { name = "pytest-xdist" },
-    { name = "ruff" },
-    { name = "srt" },
-    { name = "webvtt-py" },
-]
-opentelemetry = [
-    { name = "opentelemetry-distro" },
-    { name = "opentelemetry-exporter-otlp" },
-    { name = "opentelemetry-instrumentation-asyncio" },
-    { name = "opentelemetry-instrumentation-fastapi" },
-    { name = "opentelemetry-instrumentation-grpc" },
-    { name = "opentelemetry-instrumentation-httpx" },
-    { name = "opentelemetry-instrumentation-logging" },
-    { name = "opentelemetry-instrumentation-requests" },
-    { name = "opentelemetry-instrumentation-threading" },
-    { name = "opentelemetry-instrumentation-urllib" },
-    { name = "opentelemetry-instrumentation-urllib3" },
-]
-ui = [
-    { name = "gradio" },
-    { name = "httpx" },
-    { name = "httpx-sse" },
-    { name = "openai" },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "anyio", marker = "extra == 'dev'", specifier = ">=4.4.0" },
-    { name = "basedpyright", marker = "extra == 'dev'", specifier = ">=1.18.0" },
-    { name = "ctranslate2", specifier = ">=4.5.0" },
-    { name = "fastapi", specifier = ">=0.115.0" },
-    { name = "faster-whisper", specifier = ">=1.1.0" },
-    { name = "gradio", marker = "extra == 'ui'", specifier = ">=5.0.2" },
-    { name = "httpx", marker = "extra == 'ui'", specifier = ">=0.27.2" },
-    { name = "httpx-sse", marker = "extra == 'ui'", specifier = ">=0.4.0" },
-    { name = "huggingface-hub", extras = ["hf-transfer"], specifier = ">=0.25.1" },
-    { name = "keyboard", marker = "extra == 'client'", specifier = ">=0.13.5" },
-    { name = "mdx-truly-sane-lists", marker = "extra == 'dev'", specifier = ">=1.3" },
-    { name = "mkdocs-material", marker = "extra == 'dev'", specifier = ">=9.5.39" },
-    { name = "mkdocs-render-swagger-plugin", marker = "extra == 'dev'", specifier = ">=0.1.2" },
-    { name = "mkdocstrings", extras = ["python"], marker = "extra == 'dev'", specifier = ">=0.26.1" },
-    { name = "numpy", specifier = ">=2.1.1" },
-    { name = "openai", marker = "extra == 'ui'", specifier = ">=1.48.0" },
-    { name = "opentelemetry-distro", marker = "extra == 'opentelemetry'", specifier = ">=0.48b0" },
-    { name = "opentelemetry-exporter-otlp", marker = "extra == 'opentelemetry'", specifier = ">=1.27.0" },
-    { name = "opentelemetry-instrumentation-asyncio", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "opentelemetry-instrumentation-fastapi", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "opentelemetry-instrumentation-grpc", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "opentelemetry-instrumentation-httpx", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "opentelemetry-instrumentation-logging", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "opentelemetry-instrumentation-requests", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "opentelemetry-instrumentation-threading", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "opentelemetry-instrumentation-urllib", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "opentelemetry-instrumentation-urllib3", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
-    { name = "piper-phonemize", marker = "platform_machine == 'x86_64'", url = "https://github.com/fedirz/piper-phonemize/raw/refs/heads/master/dist/piper_phonemize-1.2.0-cp312-cp312-manylinux_2_28_x86_64.whl" },
-    { name = "piper-tts", marker = "platform_machine == 'x86_64'", specifier = ">=1.2.0" },
-    { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.0.1" },
-    { name = "pydantic", specifier = ">=2.9.0" },
-    { name = "pydantic-settings", specifier = ">=2.5.2" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.3" },
-    { name = "pytest-antilru", marker = "extra == 'dev'", specifier = ">=2.0.0" },
-    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
-    { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" },
-    { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.6.1" },
-    { name = "python-multipart", specifier = ">=0.0.10" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.7.1" },
-    { name = "sounddevice", specifier = ">=0.5.1" },
-    { name = "soundfile", specifier = ">=0.12.1" },
-    { name = "srt", marker = "extra == 'dev'", specifier = ">=3.5.3" },
-    { name = "uvicorn", specifier = ">=0.30.6" },
-    { name = "webvtt-py", marker = "extra == 'dev'", specifier = ">=0.5.1" },
-]
-
-[[package]]
 name = "ffmpy"
 version = "0.4.0"
 source = { registry = "https://pypi.org/simple" }

 ]
 
 [[package]]
+name = "speaches"
+version = "0.1.0"
+source = { editable = "." }
+dependencies = [
+    { name = "ctranslate2" },
+    { name = "fastapi" },
+    { name = "faster-whisper" },
+    { name = "huggingface-hub", extra = ["hf-transfer"] },
+    { name = "numpy" },
+    { name = "piper-phonemize", marker = "platform_machine == 'x86_64'" },
+    { name = "piper-tts", marker = "platform_machine == 'x86_64'" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "python-multipart" },
+    { name = "sounddevice" },
+    { name = "soundfile" },
+    { name = "uvicorn" },
+]
+
+[package.optional-dependencies]
+client = [
+    { name = "keyboard" },
+]
+dev = [
+    { name = "anyio" },
+    { name = "basedpyright" },
+    { name = "mdx-truly-sane-lists" },
+    { name = "mkdocs-material" },
+    { name = "mkdocs-render-swagger-plugin" },
+    { name = "mkdocstrings", extra = ["python"] },
+    { name = "pre-commit" },
+    { name = "pytest" },
+    { name = "pytest-antilru" },
+    { name = "pytest-asyncio" },
+    { name = "pytest-mock" },
+    { name = "pytest-xdist" },
+    { name = "ruff" },
+    { name = "srt" },
+    { name = "webvtt-py" },
+]
+opentelemetry = [
+    { name = "opentelemetry-distro" },
+    { name = "opentelemetry-exporter-otlp" },
+    { name = "opentelemetry-instrumentation-asyncio" },
+    { name = "opentelemetry-instrumentation-fastapi" },
+    { name = "opentelemetry-instrumentation-grpc" },
+    { name = "opentelemetry-instrumentation-httpx" },
+    { name = "opentelemetry-instrumentation-logging" },
+    { name = "opentelemetry-instrumentation-requests" },
+    { name = "opentelemetry-instrumentation-threading" },
+    { name = "opentelemetry-instrumentation-urllib" },
+    { name = "opentelemetry-instrumentation-urllib3" },
+]
+ui = [
+    { name = "gradio" },
+    { name = "httpx" },
+    { name = "httpx-sse" },
+    { name = "openai" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "anyio", marker = "extra == 'dev'", specifier = ">=4.4.0" },
+    { name = "basedpyright", marker = "extra == 'dev'", specifier = ">=1.18.0" },
+    { name = "ctranslate2", specifier = ">=4.5.0" },
+    { name = "fastapi", specifier = ">=0.115.0" },
+    { name = "faster-whisper", specifier = ">=1.1.0" },
+    { name = "gradio", marker = "extra == 'ui'", specifier = ">=5.0.2" },
+    { name = "httpx", marker = "extra == 'ui'", specifier = ">=0.27.2" },
+    { name = "httpx-sse", marker = "extra == 'ui'", specifier = ">=0.4.0" },
+    { name = "huggingface-hub", extras = ["hf-transfer"], specifier = ">=0.25.1" },
+    { name = "keyboard", marker = "extra == 'client'", specifier = ">=0.13.5" },
+    { name = "mdx-truly-sane-lists", marker = "extra == 'dev'", specifier = ">=1.3" },
+    { name = "mkdocs-material", marker = "extra == 'dev'", specifier = ">=9.5.39" },
+    { name = "mkdocs-render-swagger-plugin", marker = "extra == 'dev'", specifier = ">=0.1.2" },
+    { name = "mkdocstrings", extras = ["python"], marker = "extra == 'dev'", specifier = ">=0.26.1" },
+    { name = "numpy", specifier = ">=2.1.1" },
+    { name = "openai", marker = "extra == 'ui'", specifier = ">=1.48.0" },
+    { name = "opentelemetry-distro", marker = "extra == 'opentelemetry'", specifier = ">=0.48b0" },
+    { name = "opentelemetry-exporter-otlp", marker = "extra == 'opentelemetry'", specifier = ">=1.27.0" },
+    { name = "opentelemetry-instrumentation-asyncio", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "opentelemetry-instrumentation-fastapi", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "opentelemetry-instrumentation-grpc", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "opentelemetry-instrumentation-httpx", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "opentelemetry-instrumentation-logging", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "opentelemetry-instrumentation-requests", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "opentelemetry-instrumentation-threading", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "opentelemetry-instrumentation-urllib", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "opentelemetry-instrumentation-urllib3", marker = "extra == 'opentelemetry'", specifier = "==0.48b0" },
+    { name = "piper-phonemize", marker = "platform_machine == 'x86_64'", url = "https://github.com/fedirz/piper-phonemize/raw/refs/heads/master/dist/piper_phonemize-1.2.0-cp312-cp312-manylinux_2_28_x86_64.whl" },
+    { name = "piper-tts", marker = "platform_machine == 'x86_64'", specifier = ">=1.2.0" },
+    { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=4.0.1" },
+    { name = "pydantic", specifier = ">=2.9.0" },
+    { name = "pydantic-settings", specifier = ">=2.5.2" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.3.3" },
+    { name = "pytest-antilru", marker = "extra == 'dev'", specifier = ">=2.0.0" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
+    { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" },
+    { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.6.1" },
+    { name = "python-multipart", specifier = ">=0.0.10" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.7.1" },
+    { name = "sounddevice", specifier = ">=0.5.1" },
+    { name = "soundfile", specifier = ">=0.12.1" },
+    { name = "srt", marker = "extra == 'dev'", specifier = ">=3.5.3" },
+    { name = "uvicorn", specifier = ">=0.30.6" },
+    { name = "webvtt-py", marker = "extra == 'dev'", specifier = ">=0.5.1" },
+]
+
+[[package]]
 name = "srt"
 version = "3.5.3"
 source = { registry = "https://pypi.org/simple" }

...	...	@@ -1,7 +1,7 @@
1	1	ARG BASE_IMAGE=nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
2	2	# hadolint ignore=DL3006
3	3	FROM ${BASE_IMAGE}
4		-LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
	4	+LABEL org.opencontainers.image.source="https://github.com/speaches-ai/speaches"
5	5	LABEL org.opencontainers.image.licenses="MIT"
6	6	# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
7	7	# hadolint ignore=DL3008
...	...	@@ -15,7 +15,7 @@
15	15	USER ubuntu
16	16	ENV HOME=/home/ubuntu \
17	17	PATH=/home/ubuntu/.local/bin:$PATH
18		-WORKDIR $HOME/faster-whisper-server
	18	+WORKDIR $HOME/speaches
19	19	# https://docs.astral.sh/uv/guides/integration/docker/#installing-uv
20	20	COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.14 /uv /bin/uv
21	21	# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
...	...	@@ -35,7 +35,7 @@
35	35	ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
36	36	ENV UVICORN_HOST=0.0.0.0
37	37	ENV UVICORN_PORT=8000
38		-ENV PATH="$HOME/faster-whisper-server/.venv/bin:$PATH"
	38	+ENV PATH="$HOME/speaches/.venv/bin:$PATH"
39	39	# https://huggingface.co/docs/huggingface_hub/en/package_reference/environment_variables#hfhubenablehftransfer
40	40	# NOTE: I've disabled this because it doesn't inside of Docker container. I couldn't pinpoint the exact reason. This doesn't happen when running the server locally.
41	41	# RuntimeError: An error occurred while downloading using `hf_transfer`. Consider disabling HF_HUB_ENABLE_HF_TRANSFER for better error handling.
...	...	@@ -44,4 +44,4 @@
44	44	# https://www.reddit.com/r/StableDiffusion/comments/1f6asvd/gradio_sends_ip_address_telemetry_by_default/
45	45	ENV DO_NOT_TRACK=1
46	46	EXPOSE 8000
47		-CMD ["uvicorn", "--factory", "faster_whisper_server.main:create_app"]
	47	+CMD ["uvicorn", "--factory", "speaches.main:create_app"]

...	...	@@ -1,11 +1,15 @@
1		-# Faster Whisper Server
	1	+> [!NOTE]
	2	+> This project was previously named `faster-whisper-server`. I've decided to change the name from `faster-whisper-server`, as the project has evolved to support more than just transcription.
2	3
3		-`faster-whisper-server` is an OpenAI API-compatible transcription server which uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) as its backend.
	4	+# Speaches
	5	+
	6	+`speaches` is an OpenAI API-compatible server supporting transcription, translation, and speech generation. For transcription/translation it uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and for text-to-speech [piper](https://github.com/rhasspy/piper) is used.
	7	+
4	8	Features:
5	9
6	10	- GPU and CPU support.
7	11	- Easily deployable using Docker.
8		-- Configurable through environment variables (see [config.py](./src/faster_whisper_server/config.py)).
	12	+- Configurable through environment variables (see [config.py](./src/speaches/config.py)).
9	13	- OpenAI API compatible.
10	14	- Streaming support (transcription is sent via [SSE](https://en.wikipedia.org/wiki/Server-sent_events) as the audio is transcribed. You don't need to wait for the audio to fully be transcribed before receiving it).
11	15	- Live transcription support (audio is sent via websocket as it's generated).
...	...	@@ -18,7 +22,7 @@
18	22	See [OpenAI API reference](https://platform.openai.com/docs/api-reference/audio) for more information.
19	23
20	24	- Audio file transcription via `POST /v1/audio/transcriptions` endpoint.
21		- - Unlike OpenAI's API, `faster-whisper-server` also supports streaming transcriptions (and translations). This is useful for when you want to process large audio files and would rather receive the transcription in chunks as they are processed, rather than waiting for the whole file to be transcribed. It works similarly to chat messages when chatting with LLMs.
	25	+ - Unlike OpenAI's API, `speaches` also supports streaming transcriptions (and translations). This is useful for when you want to process large audio files and would rather receive the transcription in chunks as they are processed, rather than waiting for the whole file to be transcribed. It works similarly to chat messages when chatting with LLMs.
22	26	- Audio file translation via `POST /v1/audio/translations` endpoint.
23	27	- Live audio transcription via `WS /v1/audio/transcriptions` endpoint.
24	28	- LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) \| [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for live transcription.
...	...	@@ -35,13 +39,13 @@
35	39	NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
36	40
37	41	```bash
38		-curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
	42	+curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.yaml
39	43
40	44	# for GPU support
41		-curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
	45	+curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cuda.yaml
42	46	docker compose --file compose.cuda.yaml up --detach
43	47	# for CPU only (use this if you don't have a GPU, as the image is much smaller)
44		-curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
	48	+curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cpu.yaml
45	49	docker compose --file compose.cpu.yaml up --detach
46	50	```
47	51
...	...	@@ -49,9 +53,9 @@
49	53
50	54	```bash
51	55	# for GPU support
52		-docker run --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --detach fedirz/faster-whisper-server:latest-cuda
	56	+docker run --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --detach ghcr.io/speaches-ai/speaches:latest-cuda
53	57	# for CPU only (use this if you don't have a GPU, as the image is much smaller)
54		-docker run --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
	58	+docker run --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=Systran/faster-whisper-small --detach ghcr.io/speaches-ai/speaches:latest-cpu
55	59	```
56	60
57	61	### Using Kubernetes

...	...	@@ -2,8 +2,8 @@
2	2	tasks:
3	3	server:
4	4	cmds:
5		- - pkill --signal SIGKILL --echo --full 'uvicorn --factory --host 0.0.0.0 faster_whisper_server.main:create_app' \|\| true
6		- - opentelemetry-instrument uvicorn --factory --host 0.0.0.0 faster_whisper_server.main:create_app {{.CLI_ARGS}}
	5	+ - pkill --signal SIGKILL --echo --full 'uvicorn --factory --host 0.0.0.0 speaches.main:create_app' \|\| true
	6	+ - opentelemetry-instrument uvicorn --factory --host 0.0.0.0 speaches.main:create_app {{.CLI_ARGS}}
7	7	sources:
8	8	- src/*/.py
9	9	test:

...	...	@@ -1,11 +1,11 @@
1	1	# include:
2	2	# - compose.observability.yaml
3	3	services:
4		- faster-whisper-server:
	4	+ speaches:
5	5	extends:
6	6	file: compose.yaml
7		- service: faster-whisper-server
8		- image: fedirz/faster-whisper-server:latest-cpu
	7	+ service: speaches
	8	+ image: ghcr.io/speaches-ai/speaches:latest-cpu
9	9	build:
10	10	args:
11	11	BASE_IMAGE: ubuntu:24.04

...	...	@@ -4,10 +4,10 @@
4	4	# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
5	5	# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
6	6	services:
7		- faster-whisper-server:
	7	+ speaches:
8	8	extends:
9	9	file: compose.cuda.yaml
10		- service: faster-whisper-server
	10	+ service: speaches
11	11	volumes:
12	12	- hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
13	13	deploy:

...	...	@@ -5,7 +5,7 @@
5	5	volumes:
6	6	- ./configuration/opentelemetry-collector.yaml:/etc/opentelemetry-collector.yaml
7	7	ports:
8		- # NOTE: when `faster-whisper-server` is also running as a Docker Compose service, this doesn't need to be exposed.
	8	+ # NOTE: when `speaches` is also running as a Docker Compose service, this doesn't need to be exposed.
9	9	- 4317:4317 # OTLP gRPC receiver
10	10	# - 4318:4318 # OTLP HTTP receiver
11	11	# - 8888:8888 # Prometheus metrics exposed by the Collector

...	...	@@ -1,7 +1,7 @@
1	1	# TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
2	2	services:
3		- faster-whisper-server:
4		- container_name: faster-whisper-server
	3	+ speaches:
	4	+ container_name: speaches
5	5	build:
6	6	dockerfile: Dockerfile
7	7	context: .

...	...	@@ -1,5 +1,5 @@
1	1	<!-- https://mkdocstrings.github.io/python/usage/configuration/general/ -->
2		-::: faster_whisper_server.config.Config
	2	+::: speaches.config.Config
3	3	options:
4	4	show_bases: true
5	5	show_if_no_docstring: true
...	...	@@ -16,7 +16,7 @@
16	16	- "!speech_*"
17	17	- "!transcription_*"
18	18
19		-::: faster_whisper_server.config.WhisperConfig
	19	+::: speaches.config.WhisperConfig
20	20
21	21	<!-- TODO: nested model `whisper` -->
22	22	<!-- TODO: Insert new lines for multi-line docstrings -->

...	...	@@ -9,25 +9,25 @@
9	9	=== "CUDA"
10	10
11	11	```bash
12		- curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
13		- curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
	12	+ curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.yaml
	13	+ curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cuda.yaml
14	14	export COMPOSE_FILE=compose.cuda.yaml
15	15	```
16	16
17	17	=== "CUDA (with CDI feature enabled)"
18	18
19	19	```bash
20		- curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
21		- curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
22		- curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda-cdi.yaml
	20	+ curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.yaml
	21	+ curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cuda.yaml
	22	+ curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cuda-cdi.yaml
23	23	export COMPOSE_FILE=compose.cuda-cdi.yaml
24	24	```
25	25
26	26	=== "CPU"
27	27
28	28	```bash
29		- curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
30		- curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
	29	+ curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.yaml
	30	+ curl --silent --remote-name https://raw.githubusercontent.com/speaches-ai/speaches/master/compose.cpu.yaml
31	31	export COMPOSE_FILE=compose.cpu.yaml
32	32	```
33	33
...	...	@@ -58,10 +58,10 @@
58	58	--rm \
59	59	--detach \
60	60	--publish 8000:8000 \
61		- --name faster-whisper-server \
	61	+ --name speaches \
62	62	--volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub \
63	63	--gpus=all \
64		- fedirz/faster-whisper-server:latest-cuda
	64	+ ghcr.io/speaches-ai/speaches:latest-cuda
65	65	```
66	66
67	67	=== "CUDA (with CDI feature enabled)"
...	...	@@ -71,10 +71,10 @@
71	71	--rm \
72	72	--detach \
73	73	--publish 8000:8000 \
74		- --name faster-whisper-server \
	74	+ --name speaches \
75	75	--volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub \
76	76	--device=nvidia.com/gpu=all \
77		- fedirz/faster-whisper-server:latest-cuda
	77	+ ghcr.io/speaches-ai/speaches:latest-cuda
78	78	```
79	79
80	80	=== "CPU"
...	...	@@ -84,31 +84,31 @@
84	84	--rm \
85	85	--detach \
86	86	--publish 8000:8000 \
87		- --name faster-whisper-server \
	87	+ --name speaches \
88	88	--volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub \
89		- fedirz/faster-whisper-server:latest-cpu
	89	+ ghcr.io/speaches-ai/speaches:latest-cpu
90	90	```
91	91
92	92	??? note "Build from source"
93	93
94	94	```bash
95		- docker build --tag faster-whisper-server .
	95	+ docker build --tag speaches .
96	96
97	97	# NOTE: you need to install and enable [buildx](https://github.com/docker/buildx) for multi-platform builds
98	98	# Build image for both amd64 and arm64
99		- docker buildx build --tag faster-whisper-server --platform linux/amd64,linux/arm64 .
	99	+ docker buildx build --tag speaches --platform linux/amd64,linux/arm64 .
100	100
101	101	# Build image without CUDA support
102		- docker build --tag faster-whisper-server --build-arg BASE_IMAGE=ubuntu:24.04 .
	102	+ docker build --tag speaches --build-arg BASE_IMAGE=ubuntu:24.04 .
103	103	```
104	104
105	105	## Python (requires Python 3.12+ and `uv` package manager)
106	106
107	107	```bash
108		-git clone https://github.com/fedirz/faster-whisper-server.git
109		-cd faster-whisper-server
	108	+git clone https://github.com/speaches-ai/speaches.git
	109	+cd speaches
110	110	uv venv
111	111	sourve .venv/bin/activate
112	112	uv sync --all-extras
113		-uvicorn --factory --host 0.0.0.0 faster_whisper_server.main:create_app
	113	+uvicorn --factory --host 0.0.0.0 speaches.main:create_app
114	114	```

...	...	@@ -8,19 +8,20 @@
8	8
9	9	TODO: add HuggingFace Space URL
10	10
11		-# Faster Whisper Server
	11	+# Speaches
12	12
13		-`faster-whisper-server` is an OpenAI API-compatible server supporting transcription, translation, and speech generation. For transcription/translation it uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and for text-to-speech [piper](https://github.com/rhasspy/piper) is used.
	13	+`speaches` is an OpenAI API-compatible server supporting transcription, translation, and speech generation. For transcription/translation it uses [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and for text-to-speech [piper](https://github.com/rhasspy/piper) is used.
14	14
15	15	## Features:
16	16
17	17	- GPU and CPU support.
18	18	- [Deployable via Docker Compose / Docker](./installation.md)
19	19	- [Highly configurable](./configuration.md)
20		-- OpenAI API compatible. All tools and SDKs that work with OpenAI's API should work with `faster-whisper-server`.
	20	+- OpenAI API compatible. All tools and SDKs that work with OpenAI's API should work with `speaches`.
21	21	- Streaming support (transcription is sent via [SSE](https://en.wikipedia.org/wiki/Server-sent_events) as the audio is transcribed. You don't need to wait for the audio to fully be transcribed before receiving it).
22	22	- Live transcription support (audio is sent via websocket as it's generated).
23	23	- Dynamic model loading / offloading. Just specify which model you want to use in the request and it will be loaded automatically. It will then be unloaded after a period of inactivity.
	24	+- [Text-to-speech (TTS) via `piper`]
24	25	- (Coming soon) Audio generation (chat completions endpoint) \| [OpenAI Documentation](https://platform.openai.com/docs/guides/realtime)
25	26	- Generate a spoken audio summary of a body of text (text in, audio out)
26	27	- Perform sentiment analysis on a recording (audio in, text out)
...	...	@@ -34,7 +35,7 @@
34	35	See [OpenAI API reference](https://platform.openai.com/docs/api-reference/audio) for more information.
35	36
36	37	- Audio file transcription via `POST /v1/audio/transcriptions` endpoint.
37		- - Unlike OpenAI's API, `faster-whisper-server` also supports streaming transcriptions (and translations). This is useful for when you want to process large audio files and would rather receive the transcription in chunks as they are processed, rather than waiting for the whole file to be transcribed. It works similarly to chat messages when chatting with LLMs.
	38	+ - Unlike OpenAI's API, `speaches` also supports streaming transcriptions (and translations). This is useful for when you want to process large audio files and would rather receive the transcription in chunks as they are processed, rather than waiting for the whole file to be transcribed. It works similarly to chat messages when chatting with LLMs.
38	39	- Audio file translation via `POST /v1/audio/translations` endpoint.
39	40	- Live audio transcription via `WS /v1/audio/transcriptions` endpoint.
40	41	- LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) \| [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for live transcription.

...	...	@@ -6,7 +6,7 @@
6	6	2. Click on the "Audio" tab
7	7	3. Update settings
8	8	- Speech-to-Text Engine: OpenAI
9		- - API Base URL: http://faster-whisper-server:8000/v1
	9	+ - API Base URL: http://speaches:8000/v1
10	10	- API Key: does-not-matter-what-you-put-but-should-not-be-empty
11	11	- Model: Systran/faster-distil-whisper-large-v3
12	12	4. Click "Save"
...	...	@@ -27,10 +27,10 @@
27	27	...
28	28	# Environment variables are documented here https://docs.openwebui.com/getting-started/env-configuration#speech-to-text
29	29	AUDIO_STT_ENGINE: "openai"
30		- AUDIO_STT_OPENAI_API_BASE_URL: "http://faster-whisper-server:8000/v1"
	30	+ AUDIO_STT_OPENAI_API_BASE_URL: "http://speaches:8000/v1"
31	31	AUDIO_STT_OPENAI_API_KEY: "does-not-matter-what-you-put-but-should-not-be-empty"
32	32	AUDIO_STT_MODEL: "Systran/faster-distil-whisper-large-v3"
33		- faster-whisper-server:
34		- image: fedirz/faster-whisper-server:latest-cuda
	33	+ speaches:
	34	+ image: ghcr.io/speaches-ai/speaches:latest-cuda
35	35	...
36	36	```

...	...	@@ -2,7 +2,6 @@
2	2
3	3	This feature not supported on ARM devices only x86_64. I was unable to build [piper-phonemize](https://github.com/rhasspy/piper-phonemize)(my [fork](https://github.com/fedirz/piper-phonemize))
4	4
5		-http://localhost:8001/faster-whisper-server/api/
6	5	TODO: add a note about automatic downloads
7	6	TODO: add a demo
8	7	TODO: add a note about tts only running on cpu
...	...	@@ -19,13 +18,13 @@
19	18
20	19	```bash
21	20	# Download all voices (~15 minutes / 7.7 Gbs)
22		-docker exec -it faster-whisper-server huggingface-cli download rhasspy/piper-voices
	21	+docker exec -it speaches huggingface-cli download rhasspy/piper-voices
23	22	# Download all English voices (~4.5 minutes)
24		-docker exec -it faster-whisper-server huggingface-cli download rhasspy/piper-voices --include 'en/*/' 'voices.json'
	23	+docker exec -it speaches huggingface-cli download rhasspy/piper-voices --include 'en/*/' 'voices.json'
25	24	# Download all qualities of a specific voice (~4 seconds)
26		-docker exec -it faster-whisper-server huggingface-cli download rhasspy/piper-voices --include 'en/en_US/amy/*/' 'voices.json'
	25	+docker exec -it speaches huggingface-cli download rhasspy/piper-voices --include 'en/en_US/amy/*/' 'voices.json'
27	26	# Download specific quality of a specific voice (~2 seconds)
28		-docker exec -it faster-whisper-server huggingface-cli download rhasspy/piper-voices --include 'en/en_US/amy/medium/*' 'voices.json'
	27	+docker exec -it speaches huggingface-cli download rhasspy/piper-voices --include 'en/en_US/amy/medium/*' 'voices.json'
29	28	```
30	29
31	30	!!! note

...	...	@@ -1,5 +1,5 @@
1	1	/**
2		- * Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26
	2	+ * Example provided by https://github.com/Gan-Xing in https://github.com/speaches-ai/speaches/issues/26
3	3	*/
4	4	import 'dotenv/config';
5	5	import fs from 'node:fs';

...	...	@@ -9,10 +9,10 @@
9	9
10	10	export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
11	11
12		-# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
13		-docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
	12	+# Ensure you have `speaches` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
	13	+docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL ghcr.io/speaches-ai/speaches:latest-cuda
14	14	# or you can run it on a CPU
15		-# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
	15	+# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL ghcr.io/speaches-ai/speaches:latest-cpu
16	16
17	17	# `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -`
18	18	# shellcheck disable=SC2002

...	...	@@ -5,10 +5,10 @@
5	5	# NOTE: do not use any distil-* model other than the large ones as they don't work on long audio files for some reason.
6	6	export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference.
7	7
8		-# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
9		-docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda
	8	+# Ensure you have `speaches` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`.
	9	+docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL ghcr.io/speaches-ai/speaches:latest-cuda
10	10	# or you can run it on a CPU
11		-# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu
	11	+# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL ghcr.io/speaches-ai/speaches:latest-cpu
12	12
13	13	# Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl
14	14	youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs'

...	...	@@ -1,8 +1,8 @@
1	1	# yaml-language-server: $schema=https://squidfunk.github.io/mkdocs-material/schema.json
2	2	# https://www.mkdocs.org/user-guide/configuration/#configuration
3		-site_name: Faster Whisper Server Documentation
4		-site_url: https://fedirz.github.io/faster-whisper-server/
5		-repo_url: https://github.com/fedirz/faster-whisper-server/
	3	+site_name: Speaches Documentation
	4	+site_url: https://speaches-ai.github.io/speaches/
	5	+repo_url: https://github.com/speaches-ai/speaches/
6	6	edit_uri: edit/master/docs/
7	7	docs_dir: docs
8	8	theme:

...	...	@@ -1,5 +1,5 @@
1	1	[project]
2		-name = "faster-whisper-server"
	2	+name = "speaches"
3	3	version = "0.1.0"
4	4	requires-python = ">=3.12,<3.13"
5	5	# https://packaging.python.org/en/latest/specifications/version-specifiers/#id5

...	...	@@ -4,7 +4,7 @@
4	4
5	5	from pydantic import BaseModel, ConfigDict, Field
6	6
7		-from faster_whisper_server.text_utils import Transcription, canonicalize_word, segments_to_text
	7	+from speaches.text_utils import Transcription, canonicalize_word, segments_to_text
8	8
9	9	if TYPE_CHECKING:
10	10	from collections.abc import Iterable
...	...	@@ -23,7 +23,7 @@
23	23	def from_segments(cls, segments: Iterable[TranscriptionSegment]) -> list[TranscriptionWord]:
24	24	words: list[TranscriptionWord] = []
25	25	for segment in segments:
26		- # NOTE: a temporary "fix" for https://github.com/fedirz/faster-whisper-server/issues/58.
	26	+ # NOTE: a temporary "fix" for https://github.com/speaches-ai/speaches/issues/58.
27	27	# TODO: properly address the issue
28	28	assert (
29	29	segment.words is not None

...	...	@@ -5,13 +5,13 @@
5	5	import time
6	6	from typing import TYPE_CHECKING
7	7
8		-from faster_whisper_server.api_models import TranscriptionSegment, TranscriptionWord
9		-from faster_whisper_server.text_utils import Transcription
	8	+from speaches.api_models import TranscriptionSegment, TranscriptionWord
	9	+from speaches.text_utils import Transcription
10	10
11	11	if TYPE_CHECKING:
12	12	from faster_whisper import transcribe
13	13
14		- from faster_whisper_server.audio import Audio
	14	+ from speaches.audio import Audio
15	15
16	16	logger = logging.getLogger(__name__)
17	17

...	...	@@ -7,7 +7,7 @@
7	7	import numpy as np
8	8	import soundfile as sf
9	9
10		-from faster_whisper_server.config import SAMPLES_PER_SECOND
	10	+from speaches.config import SAMPLES_PER_SECOND
11	11
12	12	if TYPE_CHECKING:
13	13	from collections.abc import AsyncGenerator

Delete comment

...	...	@@ -9,8 +9,8 @@
9	9	from openai.resources.audio import AsyncSpeech, AsyncTranscriptions
10	10	from openai.resources.chat.completions import AsyncCompletions
11	11
12		-from faster_whisper_server.config import Config
13		-from faster_whisper_server.model_manager import PiperModelManager, WhisperModelManager
	12	+from speaches.config import Config
	13	+from speaches.model_manager import PiperModelManager, WhisperModelManager
14	14
15	15	logger = logging.getLogger(__name__)
16	16
...	...	@@ -73,7 +73,7 @@
73	73	config = get_config()
74	74	if config.speech_base_url is None:
75	75	# this might not work as expected if `speech_router` won't have shared state (access to the same `model_manager`) with the main FastAPI `app`. TODO: verify # noqa: E501
76		- from faster_whisper_server.routers.speech import (
	76	+ from speaches.routers.speech import (
77	77	router as speech_router,
78	78	)
79	79
...	...	@@ -94,7 +94,7 @@
94	94	config = get_config()
95	95	if config.transcription_base_url is None:
96	96	# this might not work as expected if `transcription_router` won't have shared state (access to the same `model_manager`) with the main FastAPI `app`. TODO: verify # noqa: E501
97		- from faster_whisper_server.routers.stt import (
	97	+ from speaches.routers.stt import (
98	98	router as stt_router,
99	99	)
100	100

...	...	@@ -7,8 +7,8 @@
7	7	from httpx_sse import aconnect_sse
8	8	from openai import AsyncOpenAI
9	9
10		-from faster_whisper_server.config import Config, Task
11		-from faster_whisper_server.hf_utils import PiperModel
	10	+from speaches.config import Config, Task
	11	+from speaches.hf_utils import PiperModel
12	12
13	13	TRANSCRIPTION_ENDPOINT = "/v1/audio/transcriptions"
14	14	TRANSLATION_ENDPOINT = "/v1/audio/translations"
...	...	@@ -128,9 +128,9 @@
128	128	file.write(audio_bytes)
129	129	return file_path
130	130
131		- with gr.Blocks(title="faster-whisper-server Playground") as demo:
	131	+ with gr.Blocks(title="Speaches Playground") as demo:
132	132	gr.Markdown(
133		- "### Consider supporting the project by starring the [repository on GitHub](https://github.com/fedirz/faster-whisper-server)."
	133	+ "### Consider supporting the project by starring the [repository on GitHub](https://github.com/speaches-ai/speaches)."
134	134	)
135	135	with gr.Tab(label="Transcribe/Translate"):
136	136	audio = gr.Audio(type="filepath")
...	...	@@ -157,7 +157,7 @@
157	157
158	158	with gr.Tab(label="Speech Generation"):
159	159	if platform.machine() != "x86_64":
160		- from faster_whisper_server.routers.speech import (
	160	+ from speaches.routers.speech import (
161	161	DEFAULT_VOICE,
162	162	MAX_SAMPLE_RATE,
163	163	MIN_SAMPLE_RATE,

...	...	@@ -10,7 +10,7 @@
10	10	from huggingface_hub.constants import HF_HUB_CACHE
11	11	from pydantic import BaseModel, Field, computed_field
12	12
13		-from faster_whisper_server.api_models import Model
	13	+from speaches.api_models import Model
14	14
15	15	logger = logging.getLogger(__name__)
16	16

...	...	@@ -10,15 +10,15 @@
10	10	)
11	11	from fastapi.middleware.cors import CORSMiddleware
12	12
13		-from faster_whisper_server.dependencies import ApiKeyDependency, get_config, get_model_manager
14		-from faster_whisper_server.logger import setup_logger
15		-from faster_whisper_server.routers.misc import (
	13	+from speaches.dependencies import ApiKeyDependency, get_config, get_model_manager
	14	+from speaches.logger import setup_logger
	15	+from speaches.routers.misc import (
16	16	router as misc_router,
17	17	)
18		-from faster_whisper_server.routers.models import (
	18	+from speaches.routers.models import (
19	19	router as models_router,
20	20	)
21		-from faster_whisper_server.routers.stt import (
	21	+from speaches.routers.stt import (
22	22	router as stt_router,
23	23	)
24	24
...	...	@@ -47,7 +47,7 @@
47	47	logger.debug(f"Config: {config}")
48	48
49	49	if platform.machine() == "x86_64":
50		- from faster_whisper_server.routers.speech import (
	50	+ from speaches.routers.speech import (
51	51	router as speech_router,
52	52	)
53	53	else:
...	...	@@ -86,7 +86,7 @@
86	86	if config.enable_ui:
87	87	import gradio as gr
88	88
89		- from faster_whisper_server.gradio_app import create_gradio_demo
	89	+ from speaches.gradio_app import create_gradio_demo
90	90
91	91	app = gr.mount_gradio_app(app, create_gradio_demo(config), path="/")
92	92

...	...	@@ -9,14 +9,14 @@
9	9
10	10	from faster_whisper import WhisperModel
11	11
12		-from faster_whisper_server.hf_utils import get_piper_voice_model_file
	12	+from speaches.hf_utils import get_piper_voice_model_file
13	13
14	14	if TYPE_CHECKING:
15	15	from collections.abc import Callable
16	16
17	17	from piper.voice import PiperVoice
18	18
19		- from faster_whisper_server.config import (
	19	+ from speaches.config import (
20	20	WhisperConfig,
21	21	)
22	22

...	...	@@ -7,8 +7,8 @@
7	7	import huggingface_hub
8	8	from huggingface_hub.hf_api import RepositoryNotFoundError
9	9
10		-from faster_whisper_server import hf_utils
11		-from faster_whisper_server.dependencies import ModelManagerDependency # noqa: TCH001
	10	+from speaches import hf_utils
	11	+from speaches.dependencies import ModelManagerDependency # noqa: TC001
12	12
13	13	router = APIRouter()
14	14

...	...	@@ -9,11 +9,11 @@
9	9	)
10	10	import huggingface_hub
11	11
12		-from faster_whisper_server.api_models import (
	12	+from speaches.api_models import (
13	13	ListModelsResponse,
14	14	Model,
15	15	)
16		-from faster_whisper_server.hf_utils import list_whisper_models
	16	+from speaches.hf_utils import list_whisper_models
17	17
18	18	if TYPE_CHECKING:
19	19	from huggingface_hub.hf_api import ModelInfo

...	...	@@ -11,8 +11,8 @@
11	11	from pydantic import BaseModel, BeforeValidator, Field, ValidationError, model_validator
12	12	import soundfile as sf
13	13
14		-from faster_whisper_server.dependencies import PiperModelManagerDependency
15		-from faster_whisper_server.hf_utils import (
	14	+from speaches.dependencies import PiperModelManagerDependency
	15	+from speaches.hf_utils import (
16	16	PiperModel,
17	17	list_piper_models,
18	18	read_piper_voices_config,

...	...	@@ -27,7 +27,7 @@
27	27	from numpy.typing import NDArray
28	28	from pydantic import AfterValidator, Field
29	29
30		-from faster_whisper_server.api_models import (
	30	+from speaches.api_models import (
31	31	DEFAULT_TIMESTAMP_GRANULARITIES,
32	32	TIMESTAMP_GRANULARITIES_COMBINATIONS,
33	33	CreateTranscriptionResponseJson,
...	...	@@ -35,17 +35,17 @@
35	35	TimestampGranularities,
36	36	TranscriptionSegment,
37	37	)
38		-from faster_whisper_server.asr import FasterWhisperASR
39		-from faster_whisper_server.audio import AudioStream, audio_samples_from_file
40		-from faster_whisper_server.config import (
	38	+from speaches.asr import FasterWhisperASR
	39	+from speaches.audio import AudioStream, audio_samples_from_file
	40	+from speaches.config import (
41	41	SAMPLES_PER_SECOND,
42	42	Language,
43	43	ResponseFormat,
44	44	Task,
45	45	)
46		-from faster_whisper_server.dependencies import ConfigDependency, ModelManagerDependency, get_config
47		-from faster_whisper_server.text_utils import segments_to_srt, segments_to_text, segments_to_vtt
48		-from faster_whisper_server.transcriber import audio_transcriber
	46	+from speaches.dependencies import ConfigDependency, ModelManagerDependency, get_config
	47	+from speaches.text_utils import segments_to_srt, segments_to_text, segments_to_vtt
	48	+from speaches.transcriber import audio_transcriber
49	49
50	50	if TYPE_CHECKING:
51	51	from collections.abc import Generator, Iterable
...	...	@@ -77,7 +77,7 @@
77	77	) from e
78	78	except Exception as e:
79	79	logger.exception(
80		- "Failed to decode audio. This is likely a bug. Please create an issue at https://github.com/fedirz/faster-whisper-server/issues/new."
	80	+ "Failed to decode audio. This is likely a bug. Please create an issue at https://github.com/speaches-ai/speaches/issues/new."
81	81	)
82	82	raise HTTPException(status_code=500, detail="Failed to decode audio.") from e
83	83	else:

...	...	@@ -3,14 +3,14 @@
3	3	import logging
4	4	from typing import TYPE_CHECKING
5	5
6		-from faster_whisper_server.audio import Audio, AudioStream
7		-from faster_whisper_server.text_utils import Transcription, common_prefix, to_full_sentences, word_to_text
	6	+from speaches.audio import Audio, AudioStream
	7	+from speaches.text_utils import Transcription, common_prefix, to_full_sentences, word_to_text
8	8
9	9	if TYPE_CHECKING:
10	10	from collections.abc import AsyncGenerator
11	11
12		- from faster_whisper_server.api_models import TranscriptionWord
13		- from faster_whisper_server.asr import FasterWhisperASR
	12	+ from speaches.api_models import TranscriptionWord
	13	+ from speaches.asr import FasterWhisperASR
14	14
15	15	logger = logging.getLogger(__name__)
16	16