Fedir Zadniprovskyi 2024-05-20
chore: Dockerfile envs, log ws close, etc.
@8ad3023b75c1d66936702ad8c846fb986b12ce32
Dockerfile.cpu
--- Dockerfile.cpu
+++ Dockerfile.cpu
@@ -12,3 +12,5 @@
 COPY ./speaches ./speaches
 ENTRYPOINT ["poetry", "run"]
 CMD ["uvicorn", "speaches.main:app"]
+ENV MODEL_SIZE=distil-small.en
+ENV DEVICE=cpu
Dockerfile.cuda
--- Dockerfile.cuda
+++ Dockerfile.cuda
@@ -12,3 +12,5 @@
 COPY ./speaches ./speaches
 ENTRYPOINT ["poetry", "run"]
 CMD ["uvicorn", "speaches.main:app"]
+ENV MODEL_SIZE=distil-medium.en
+ENV DEVICE=cuda
Taskfile.yaml
--- Taskfile.yaml
+++ Taskfile.yaml
@@ -8,8 +8,8 @@
       - "**/*.py"
   build-and-push:
     cmds:
-      - docker compose build --push speaches
+      - docker compose build --push
     sources:
-      - Dockerfile
+      - Dockerfile.*
       - speaches/*.py
   sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box speaches
speaches/config.py
--- speaches/config.py
+++ speaches/config.py
@@ -10,7 +10,7 @@
 # 1 SECOND OF AUDIO = 32000 BYTES = 16000 SAMPLES
 
 
-# TODO: confirm names
+# https://huggingface.co/Systran
 class Model(enum.StrEnum):
     TINY_EN = "tiny.en"
     TINY = "tiny"
speaches/main.py
--- speaches/main.py
+++ speaches/main.py
@@ -7,14 +7,8 @@
 from io import BytesIO
 from typing import Annotated
 
-from fastapi import (
-    Depends,
-    FastAPI,
-    Response,
-    UploadFile,
-    WebSocket,
-    WebSocketDisconnect,
-)
+from fastapi import (Depends, FastAPI, Response, UploadFile, WebSocket,
+                     WebSocketDisconnect)
 from fastapi.websockets import WebSocketState
 from faster_whisper import WhisperModel
 from faster_whisper.vad import VadOptions, get_speech_timestamps
@@ -24,11 +18,8 @@
 from speaches.config import SAMPLES_PER_SECOND, Language, config
 from speaches.core import Transcription
 from speaches.logger import logger
-from speaches.server_models import (
-    ResponseFormat,
-    TranscriptionResponse,
-    TranscriptionVerboseResponse,
-)
+from speaches.server_models import (ResponseFormat, TranscriptionResponse,
+                                    TranscriptionVerboseResponse)
 from speaches.transcriber import audio_transcriber
 
 whisper: WhisperModel = None  # type: ignore
@@ -158,5 +149,5 @@
             await ws.send_text(format_transcription(transcription, response_format))
 
     if not ws.client_state == WebSocketState.DISCONNECTED:
-        # this means that the client HASNT disconnected
+        logger.info("Closing the connection.")
         await ws.close()
Add a comment
List