Commit @47627a910737768962d95354783277cfbf7e6c3c - yjyoon/whisper_server

af41874

47627a9

.github/workflows/docker-build-and-push.yaml

--- .github/workflows/docker-build-and-push.yaml

+++ .github/workflows/docker-build-and-push.yaml


         dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
         include:
           - dockerfile: Dockerfile.cuda
-            tag-prefix: cuda-
+            tag-suffix: -cuda
           - dockerfile: Dockerfile.cpu
-            tag-prefix: cpu-
+            tag-suffix: -cpu
     steps:
       - uses: actions/checkout@v4
       - name: Login to Docker Hub

             fedirz/faster-whisper-server
           # https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
           flavor: |
-            prefix=${{ matrix.tag-prefix }}
+            latest=false
+            suffix=${{ matrix.tag-suffix}}
           tags: |
             type=semver,pattern={{version}}
             type=semver,pattern={{major}}.{{minor}}

af41874

47627a9

Dockerfile.cpu

--- Dockerfile.cpu

+++ Dockerfile.cpu


 COPY ./faster_whisper_server ./faster_whisper_server
 ENTRYPOINT ["poetry", "run"]
 CMD ["uvicorn", "faster_whisper_server.main:app"]
-ENV WHISPER_MODEL=distil-medium.en
+ENV WHISPER_MODEL=medium.en
 ENV WHISPER_INFERENCE_DEVICE=cpu
 ENV WHISPER_COMPUTE_TYPE=int8
 ENV UVICORN_HOST=0.0.0.0

af41874

47627a9

README.md

--- README.md

+++ README.md


 # If `model` isn't specified, the default model is used
 curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav"
 curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.mp3"
-curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true"
-curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true" -F "model=distil-large-v3"
+curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true"
+curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true" -F "model=distil-large-v3"
 # It's recommended that you always specify the language as that will reduce the transcription time
-curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true" -F "model=distil-large-v3" -F "language=en"
+curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true" -F "model=distil-large-v3" -F "language=en"
 
 curl http://localhost:8000/v1/audio/translations -F "file=@audio.wav"
 ```

af41874

47627a9

faster_whisper_server/config.py

--- faster_whisper_server/config.py

+++ faster_whisper_server/config.py


 
 
 class WhisperConfig(BaseModel):
-    model: Model = Field(default=Model.DISTIL_MEDIUM_EN)
+    model: Model = Field(default=Model.MEDIUM_EN)
     inference_device: Device = Field(default=Device.AUTO)
     compute_type: Quantization = Field(default=Quantization.DEFAULT)
 

af41874

47627a9

faster_whisper_server/main.py

--- faster_whisper_server/main.py

+++ faster_whisper_server/main.py


         compute_type=config.whisper.compute_type,
     )
     logger.info(
-        f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds"
+        f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds. {config.whisper.inference_device}({config.whisper.compute_type}) will be used for inference."
     )
     models[model_name] = whisper
     return whisper

...	...	@@ -16,9 +16,9 @@
16	16	dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
17	17	include:
18	18	- dockerfile: Dockerfile.cuda
19		- tag-prefix: cuda-
	19	+ tag-suffix: -cuda
20	20	- dockerfile: Dockerfile.cpu
21		- tag-prefix: cpu-
	21	+ tag-suffix: -cpu
22	22	steps:
23	23	- uses: actions/checkout@v4
24	24	- name: Login to Docker Hub
...	...	@@ -33,7 +33,8 @@
33	33	fedirz/faster-whisper-server
34	34	# https://github.com/docker/metadata-action?tab=readme-ov-file#flavor-input
35	35	flavor: \|
36		- prefix=${{ matrix.tag-prefix }}
	36	+ latest=false
	37	+ suffix=${{ matrix.tag-suffix}}
37	38	tags: \|
38	39	type=semver,pattern={{version}}
39	40	type=semver,pattern={{major}}.{{minor}}

...	...	@@ -15,7 +15,7 @@
15	15	COPY ./faster_whisper_server ./faster_whisper_server
16	16	ENTRYPOINT ["poetry", "run"]
17	17	CMD ["uvicorn", "faster_whisper_server.main:app"]
18		-ENV WHISPER_MODEL=distil-medium.en
	18	+ENV WHISPER_MODEL=medium.en
19	19	ENV WHISPER_INFERENCE_DEVICE=cpu
20	20	ENV WHISPER_COMPUTE_TYPE=int8
21	21	ENV UVICORN_HOST=0.0.0.0

...	...	@@ -60,10 +60,10 @@
60	60	# If `model` isn't specified, the default model is used
61	61	curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav"
62	62	curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.mp3"
63		-curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true"
64		-curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true" -F "model=distil-large-v3"
	63	+curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true"
	64	+curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true" -F "model=distil-large-v3"
65	65	# It's recommended that you always specify the language as that will reduce the transcription time
66		-curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "streaming=true" -F "model=distil-large-v3" -F "language=en"
	66	+curl http://localhost:8000/v1/audio/transcriptions -F "file=@audio.wav" -F "stream=true" -F "model=distil-large-v3" -F "language=en"
67	67
68	68	curl http://localhost:8000/v1/audio/translations -F "file=@audio.wav"
69	69	```

...	...	@@ -163,7 +163,7 @@
163	163
164	164
165	165	class WhisperConfig(BaseModel):
166		- model: Model = Field(default=Model.DISTIL_MEDIUM_EN)
	166	+ model: Model = Field(default=Model.MEDIUM_EN)
167	167	inference_device: Device = Field(default=Device.AUTO)
168	168	compute_type: Quantization = Field(default=Quantization.DEFAULT)
169	169

...	...	@@ -58,7 +58,7 @@
58	58	compute_type=config.whisper.compute_type,
59	59	)
60	60	logger.info(
61		- f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds"
	61	+ f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds. {config.whisper.inference_device}({config.whisper.compute_type}) will be used for inference."
62	62	)
63	63	models[model_name] = whisper
64	64	return whisper

Delete comment