Commit @8ad3023b75c1d66936702ad8c846fb986b12ce32 - yjyoon/whisper_server

Fedir Zadniprovskyi 2024-05-20

chore: Dockerfile envs, log ws close, etc.

@8ad3023b75c1d66936702ad8c846fb986b12ce32

109bee8

8ad3023

Dockerfile.cpu

--- Dockerfile.cpu

+++ Dockerfile.cpu

...	...	@@ -12,3 +12,5 @@
12	12	COPY ./speaches ./speaches
13	13	ENTRYPOINT ["poetry", "run"]
14	14	CMD ["uvicorn", "speaches.main:app"]
	15	+ENV MODEL_SIZE=distil-small.en
	16	+ENV DEVICE=cpu

109bee8

8ad3023

Dockerfile.cuda

--- Dockerfile.cuda

+++ Dockerfile.cuda

...	...	@@ -12,3 +12,5 @@
12	12	COPY ./speaches ./speaches
13	13	ENTRYPOINT ["poetry", "run"]
14	14	CMD ["uvicorn", "speaches.main:app"]
	15	+ENV MODEL_SIZE=distil-medium.en
	16	+ENV DEVICE=cuda

109bee8

8ad3023

Taskfile.yaml

--- Taskfile.yaml

+++ Taskfile.yaml


       - "**/*.py"
   build-and-push:
     cmds:
-      - docker compose build --push speaches
+      - docker compose build --push
     sources:
-      - Dockerfile
+      - Dockerfile.*
       - speaches/*.py
   sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box speaches

109bee8

8ad3023

speaches/config.py

--- speaches/config.py

+++ speaches/config.py


 # 1 SECOND OF AUDIO = 32000 BYTES = 16000 SAMPLES
 
 
-# TODO: confirm names
+# https://huggingface.co/Systran
 class Model(enum.StrEnum):
     TINY_EN = "tiny.en"
     TINY = "tiny"

109bee8

8ad3023

speaches/main.py

--- speaches/main.py

+++ speaches/main.py


 from io import BytesIO
 from typing import Annotated
 
-from fastapi import (
-    Depends,
-    FastAPI,
-    Response,
-    UploadFile,
-    WebSocket,
-    WebSocketDisconnect,
-)
+from fastapi import (Depends, FastAPI, Response, UploadFile, WebSocket,
+                     WebSocketDisconnect)
 from fastapi.websockets import WebSocketState
 from faster_whisper import WhisperModel
 from faster_whisper.vad import VadOptions, get_speech_timestamps

 from speaches.config import SAMPLES_PER_SECOND, Language, config
 from speaches.core import Transcription
 from speaches.logger import logger
-from speaches.server_models import (
-    ResponseFormat,
-    TranscriptionResponse,
-    TranscriptionVerboseResponse,
-)
+from speaches.server_models import (ResponseFormat, TranscriptionResponse,
+                                    TranscriptionVerboseResponse)
 from speaches.transcriber import audio_transcriber
 
 whisper: WhisperModel = None  # type: ignore

             await ws.send_text(format_transcription(transcription, response_format))
 
     if not ws.client_state == WebSocketState.DISCONNECTED:
-        # this means that the client HASNT disconnected
+        logger.info("Closing the connection.")
         await ws.close()

Add a comment

Open 0
Closed 0

List

...	...	@@ -8,8 +8,8 @@
8	8	- "*/.py"
9	9	build-and-push:
10	10	cmds:
11		- - docker compose build --push speaches
	11	+ - docker compose build --push
12	12	sources:
13		- - Dockerfile
	13	+ - Dockerfile.*
14	14	- speaches/*.py
15	15	sync: lsyncd -nodaemon -delay 0 -rsyncssh . gpu-box speaches

...	...	@@ -10,7 +10,7 @@
10	10	# 1 SECOND OF AUDIO = 32000 BYTES = 16000 SAMPLES
11	11
12	12
13		-# TODO: confirm names
	13	+# https://huggingface.co/Systran
14	14	class Model(enum.StrEnum):
15	15	TINY_EN = "tiny.en"
16	16	TINY = "tiny"

...	...	@@ -7,14 +7,8 @@
7	7	from io import BytesIO
8	8	from typing import Annotated
9	9
10		-from fastapi import (
11		- Depends,
12		- FastAPI,
13		- Response,
14		- UploadFile,
15		- WebSocket,
16		- WebSocketDisconnect,
17		-)
	10	+from fastapi import (Depends, FastAPI, Response, UploadFile, WebSocket,
	11	+ WebSocketDisconnect)
18	12	from fastapi.websockets import WebSocketState
19	13	from faster_whisper import WhisperModel
20	14	from faster_whisper.vad import VadOptions, get_speech_timestamps
...	...	@@ -24,11 +18,8 @@
24	18	from speaches.config import SAMPLES_PER_SECOND, Language, config
25	19	from speaches.core import Transcription
26	20	from speaches.logger import logger
27		-from speaches.server_models import (
28		- ResponseFormat,
29		- TranscriptionResponse,
30		- TranscriptionVerboseResponse,
31		-)
	21	+from speaches.server_models import (ResponseFormat, TranscriptionResponse,
	22	+ TranscriptionVerboseResponse)
32	23	from speaches.transcriber import audio_transcriber
33	24
34	25	whisper: WhisperModel = None # type: ignore
...	...	@@ -158,5 +149,5 @@
158	149	await ws.send_text(format_transcription(transcription, response_format))
159	150
160	151	if not ws.client_state == WebSocketState.DISCONNECTED:
161		- # this means that the client HASNT disconnected
	152	+ logger.info("Closing the connection.")
162	153	await ws.close()

Delete comment