Fedir Zadniprovskyi 2024-05-27
chore: rename to 'faster-whisper-server'
@39ee11644eedfd9dac27d3cf4d795597505e3622
.github/workflows/docker-build-and-push.yaml
--- .github/workflows/docker-build-and-push.yaml
+++ .github/workflows/docker-build-and-push.yaml
@@ -28,7 +28,7 @@
         uses: docker/metadata-action@v5
         with:
           images: |
-            fedirz/speaches
+            fedirz/faster-whisper-server
           # https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
           flavor: |
             latest=false
@@ -47,5 +47,5 @@
           # platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta.outputs.tags }}
           # TODO: cache
-          # cache-from: type=registry,ref=fedirz/speaches:buildcache
-          # cache-to: type=registry,ref=fedirz/speaches:buildcache,mode=max
+          # cache-from: type=registry,ref=fedirz/faster-whisper-server:buildcache
+          # cache-to: type=registry,ref=fedirz/faster-whisper-server:buildcache,mode=max
Dockerfile.cpu
--- Dockerfile.cpu
+++ Dockerfile.cpu
@@ -9,12 +9,12 @@
     rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
-WORKDIR /root/speaches
+WORKDIR /root/faster-whisper-server
 COPY pyproject.toml poetry.lock ./
 RUN poetry install --only main
-COPY ./speaches ./speaches
+COPY ./faster_whisper_server ./faster_whisper_server
 ENTRYPOINT ["poetry", "run"]
-CMD ["uvicorn", "speaches.main:app"]
+CMD ["uvicorn", "faster_whisper_server.main:app"]
 ENV WHISPER_MODEL=distil-medium.en
 ENV WHISPER_INFERENCE_DEVICE=cpu
 ENV WHISPER_COMPUTE_TYPE=int8
Dockerfile.cuda
--- Dockerfile.cuda
+++ Dockerfile.cuda
@@ -9,12 +9,12 @@
     rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
-WORKDIR /root/speaches
+WORKDIR /root/faster-whisper-server
 COPY pyproject.toml poetry.lock ./
 RUN poetry install --only main
-COPY ./speaches ./speaches
+COPY ./faster_whisper_server ./faster_whisper_server
 ENTRYPOINT ["poetry", "run"]
-CMD ["uvicorn", "speaches.main:app"]
+CMD ["uvicorn", "faster_whisper_server.main:app"]
 ENV WHISPER_MODEL=distil-large-v3
 ENV WHISPER_INFERENCE_DEVICE=cuda
 ENV UVICORN_HOST=0.0.0.0
README.md
--- README.md
+++ README.md
@@ -1,20 +1,27 @@
-# WARN: WIP (code is ugly, bad documentation, may have bugs, test files aren't included, CPU inference was barely tested, etc.)
-# Intro
-:peach:`speaches` is a web server that supports real-time transcription using WebSockets.
+## Faster Whisper Server
+`faster-whisper-server` is a web server that supports real-time transcription using WebSockets.
 - [faster-whisper](https://github.com/SYSTRAN/faster-whisper) is used as the backend. Both GPU and CPU inference are supported.
 - LocalAgreement2 ([paper](https://aclanthology.org/2023.ijcnlp-demo.3.pdf) | [original implementation](https://github.com/ufal/whisper_streaming)) algorithm is used for real-time transcription.
 - Can be deployed using Docker (Compose configuration can be found in [compose.yaml](./compose.yaml)).
-- All configuration is done through environment variables. See [config.py](./speaches/config.py).
+- All configuration is done through environment variables. See [config.py](./faster_whisper_server/config.py).
 - NOTE: only transcription of single channel, 16000 sample rate, raw, 16-bit little-endian audio is supported.
 - NOTE: this isn't really meant to be used as a standalone tool but rather to add transcription features to other applications.
 Please create an issue if you find a bug, have a question, or a feature suggestion.
 # Quick Start
-Spinning up a `speaches` web server
+Using Docker
 ```bash
-docker run --gpus=all --publish 8000:8000 --mount type=bind,source=$HOME/.cache/huggingface,target=/root/.cache/huggingface fedirz/speaches:cuda
+docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:cuda
 # or
-docker run --publish 8000:8000 --mount type=bind,source=$HOME/.cache/huggingface,target=/root/.cache/huggingface fedirz/speaches:cpu
+docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:cpu
 ```
+Using Docker Compose
+```bash
+curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
+docker compose up --detach up faster-whisper-server-cuda
+# or
+docker compose up --detach up faster-whisper-server-cpu
+```
+## Usage
 Streaming audio data from a microphone. [websocat](https://github.com/vi/websocat?tab=readme-ov-file#installation) installation is required.
 ```bash
 ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le - | websocat --binary ws://0.0.0.0:8000/v1/audio/transcriptions
@@ -38,7 +45,7 @@
 curl -X POST -F "file=@output.raw" http://0.0.0.0:8000/v1/audio/transcriptions
 # Output: "{\"text\":\"One,  two,  three,  four,  five.\"}"%
 ```
-# Roadmap
+## Roadmap
 - [ ] Support file transcription (non-streaming) of multiple formats.
 - [ ] CLI client.
 - [ ] Separate the web server related code from the "core", and publish "core" as a package.
Taskfile.yaml
--- Taskfile.yaml
+++ Taskfile.yaml
@@ -1,6 +1,6 @@
 version: "3"
 tasks:
-  speaches: poetry run uvicorn --host 0.0.0.0 speaches.main:app {{.CLI_ARGS}}
+  server: poetry run uvicorn --host 0.0.0.0 faster_whisper_server.main:app {{.CLI_ARGS}}
   test:
     cmds:
       - poetry run pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
@@ -11,15 +11,15 @@
       - docker compose build
     sources:
       - Dockerfile.*
-      - speaches/*.py
+      - faster_whisper_server/*.py
   create-multi-arch-builder: docker buildx create --name main --driver=docker-container
   build-and-push:
     cmds:
       - docker compose build --builder main --push
     sources:
       - Dockerfile.*
-      - speaches/*.py
-  sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box speaches
+      - faster_whisper_server/*.py
+  sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box faster-whisper-server
   # Python's urllib3 takes forever when ipv6 is enabled
   # https://support.nordvpn.com/hc/en-us/articles/20164669224337-How-to-disable-IPv6-on-Linux
   disable-ipv6: sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 && sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1
compose.yaml
--- compose.yaml
+++ compose.yaml
@@ -1,7 +1,7 @@
 # NOTE: arm images haven't been tested
 services:
-  speaches-cuda:
-    image: fedirz/speaches:cuda
+  faster-whisper-server-cuda:
+    image: fedirz/faster-whisper-server:cuda
     build:
       dockerfile: Dockerfile.cuda
       context: .
@@ -9,7 +9,7 @@
         - linux/amd64
         - linux/arm64
       tags:
-        - fedirz/speaches:cuda
+        - fedirz/faster-whisper-server:cuda
     volumes:
       - ~/.cache/huggingface:/root/.cache/huggingface
     restart: unless-stopped
@@ -20,8 +20,8 @@
         reservations:
           devices:
             - capabilities: ["gpu"]
-  speaches-cpu:
-    image: fedirz/speaches:cpu
+  faster-whisper-server-cpu:
+    image: fedirz/faster-whisper-server:cpu
     build:
       dockerfile: Dockerfile.cpu
       context: .
@@ -29,7 +29,7 @@
         - linux/amd64
         - linux/arm64
       tags:
-        - fedirz/speaches:cpu
+        - fedirz/faster-whisper-server:cpu
     volumes:
       - ~/.cache/huggingface:/root/.cache/huggingface
     restart: unless-stopped
faster_whisper_server/__init__.py (Renamed from speaches/__init__.py)
--- speaches/__init__.py
+++ faster_whisper_server/__init__.py
No changes
faster_whisper_server/asr.py (Renamed from speaches/asr.py)
--- speaches/asr.py
+++ faster_whisper_server/asr.py
@@ -4,9 +4,9 @@
 
 from faster_whisper import transcribe
 
-from speaches.audio import Audio
-from speaches.core import Transcription, Word
-from speaches.logger import logger
+from faster_whisper_server.audio import Audio
+from faster_whisper_server.core import Transcription, Word
+from faster_whisper_server.logger import logger
 
 
 class FasterWhisperASR:
faster_whisper_server/audio.py (Renamed from speaches/audio.py)
--- speaches/audio.py
+++ faster_whisper_server/audio.py
@@ -7,8 +7,8 @@
 import soundfile as sf
 from numpy.typing import NDArray
 
-from speaches.config import SAMPLES_PER_SECOND
-from speaches.logger import logger
+from faster_whisper_server.config import SAMPLES_PER_SECOND
+from faster_whisper_server.logger import logger
 
 
 def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
faster_whisper_server/config.py (Renamed from speaches/config.py)
--- speaches/config.py
+++ faster_whisper_server/config.py
No changes
faster_whisper_server/core.py (Renamed from speaches/core.py)
--- speaches/core.py
+++ faster_whisper_server/core.py
@@ -4,7 +4,7 @@
 import re
 from dataclasses import dataclass
 
-from speaches.config import config
+from faster_whisper_server.config import config
 
 
 # TODO: use the `Segment` from `faster-whisper.transcribe` instead
faster_whisper_server/logger.py (Renamed from speaches/logger.py)
--- speaches/logger.py
+++ faster_whisper_server/logger.py
@@ -1,8 +1,8 @@
 import logging
 
-from speaches.config import config
+from faster_whisper_server.config import config
 
-# Disables all but `speaches` logger
+# Disables all but `faster_whisper_server` logger
 
 root_logger = logging.getLogger()
 root_logger.setLevel(logging.CRITICAL)
faster_whisper_server/main.py (Renamed from speaches/main.py)
--- speaches/main.py
+++ faster_whisper_server/main.py
@@ -20,16 +20,22 @@
 from faster_whisper import WhisperModel
 from faster_whisper.vad import VadOptions, get_speech_timestamps
 
-from speaches import utils
-from speaches.asr import FasterWhisperASR
-from speaches.audio import AudioStream, audio_samples_from_file
-from speaches.config import SAMPLES_PER_SECOND, Language, Model, ResponseFormat, config
-from speaches.logger import logger
-from speaches.server_models import (
+from faster_whisper_server import utils
+from faster_whisper_server.asr import FasterWhisperASR
+from faster_whisper_server.audio import AudioStream, audio_samples_from_file
+from faster_whisper_server.config import (
+    SAMPLES_PER_SECOND,
+    Language,
+    Model,
+    ResponseFormat,
+    config,
+)
+from faster_whisper_server.logger import logger
+from faster_whisper_server.server_models import (
     TranscriptionJsonResponse,
     TranscriptionVerboseJsonResponse,
 )
-from speaches.transcriber import audio_transcriber
+from faster_whisper_server.transcriber import audio_transcriber
 
 models: OrderedDict[Model, WhisperModel] = OrderedDict()
 
@@ -72,7 +78,7 @@
 
 @app.get("/health")
 def health() -> Response:
-    return Response(status_code=200, content="Everything is peachy!")
+    return Response(status_code=200, content="OK")
 
 
 @app.post("/v1/audio/translations")
faster_whisper_server/server_models.py (Renamed from speaches/server_models.py)
--- speaches/server_models.py
+++ faster_whisper_server/server_models.py
@@ -3,8 +3,8 @@
 from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
 from pydantic import BaseModel
 
-from speaches import utils
-from speaches.core import Transcription
+from faster_whisper_server import utils
+from faster_whisper_server.core import Transcription
 
 
 # https://platform.openai.com/docs/api-reference/audio/json-object
faster_whisper_server/transcriber.py (Renamed from speaches/transcriber.py)
--- speaches/transcriber.py
+++ faster_whisper_server/transcriber.py
@@ -2,11 +2,16 @@
 
 from typing import AsyncGenerator
 
-from speaches.asr import FasterWhisperASR
-from speaches.audio import Audio, AudioStream
-from speaches.config import config
-from speaches.core import Transcription, Word, common_prefix, to_full_sentences
-from speaches.logger import logger
+from faster_whisper_server.asr import FasterWhisperASR
+from faster_whisper_server.audio import Audio, AudioStream
+from faster_whisper_server.config import config
+from faster_whisper_server.core import (
+    Transcription,
+    Word,
+    common_prefix,
+    to_full_sentences,
+)
+from faster_whisper_server.logger import logger
 
 
 class LocalAgreement:
faster_whisper_server/utils.py (Renamed from speaches/utils.py)
--- speaches/utils.py
+++ faster_whisper_server/utils.py
No changes
 
tests/__init__.py (deleted)
--- tests/__init__.py
@@ -1,0 +0,0 @@
tests/app_test.py
--- tests/app_test.py
+++ tests/app_test.py
@@ -10,9 +10,9 @@
 from fastapi.testclient import TestClient
 from starlette.testclient import WebSocketTestSession
 
-from speaches.config import BYTES_PER_SECOND
-from speaches.main import app
-from speaches.server_models import TranscriptionVerboseJsonResponse
+from faster_whisper_server.config import BYTES_PER_SECOND
+from faster_whisper_server.main import app
+from faster_whisper_server.server_models import TranscriptionVerboseJsonResponse
 
 SIMILARITY_THRESHOLD = 0.97
 AUDIO_FILES_LIMIT = 5
Add a comment
List