Commit @5ad8603166d89907f7737e2f81f1a45a04d7c630 - yjyoon/whisper_server

8db455c

5ad8603

.github/workflows/docker-build-and-push.yaml

--- .github/workflows/docker-build-and-push.yaml

+++ .github/workflows/docker-build-and-push.yaml


     runs-on: ubuntu-latest
     strategy:
       matrix:
-        dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
+        # https://hub.docker.com/r/nvidia/cuda/tags
+        base-image: ['nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04']
         include:
-          - dockerfile: Dockerfile.cuda
+          - base-image: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
             tag-suffix: -cuda
-          - dockerfile: Dockerfile.cpu
+          - base-image: ubuntu:24.04
             tag-suffix: -cpu
     steps:
       - uses: actions/checkout@v4

         uses: docker/build-push-action@v6
         with:
           context: .
-          file: ${{ matrix.dockerfile }}
+          file: Dockerfile
+          build-args: |
+            BASE_IMAGE=${{ matrix.base-image }}
           push: true
           platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta.outputs.tags }}

5ad8603

Dockerfile (added)

+++ Dockerfile

...	...	@@ -0,0 +1,28 @@
	1	+ARG BASE_IMAGE=nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
	2	+FROM ${BASE_IMAGE}
	3	+LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
	4	+# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
	5	+# hadolint ignore=DL3008
	6	+RUN apt-get update && \
	7	+ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
	8	+ apt-get clean && \
	9	+ rm -rf /var/lib/apt/lists/*
	10	+USER ubuntu
	11	+ENV HOME=/home/ubuntu \
	12	+ PATH=/home/ubuntu/.local/bin:$PATH
	13	+WORKDIR $HOME/faster-whisper-server
	14	+COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
	15	+# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
	16	+# https://docs.astral.sh/uv/guides/integration/docker/#compiling-bytecode
	17	+RUN --mount=type=cache,target=/root/.cache/uv \
	18	+ --mount=type=bind,source=uv.lock,target=uv.lock \
	19	+ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
	20	+ uv sync --frozen --compile-bytecode --no-install-project
	21	+COPY --chown=ubuntu ./src ./pyproject.toml ./uv.lock ./
	22	+RUN --mount=type=cache,target=/root/.cache/uv \
	23	+ uv sync --frozen --compile-bytecode --extra ui --extra opentelemetry
	24	+ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
	25	+ENV UVICORN_HOST=0.0.0.0
	26	+ENV UVICORN_PORT=8000
	27	+EXPOSE 8000
	28	+CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]

8db455c

Dockerfile.cpu (deleted)

--- Dockerfile.cpu

...	...	@@ -1,22 +0,0 @@
	1	-FROM ubuntu:24.04
	2	-LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
	3	-# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
	4	-# hadolint ignore=DL3008
	5	-RUN apt-get update && \
	6	- DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
	7	- apt-get clean && \
	8	- rm -rf /var/lib/apt/lists/*
	9	-COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
	10	-WORKDIR /root/faster-whisper-server
	11	-# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
	12	-RUN --mount=type=cache,target=/root/.cache/uv \
	13	- --mount=type=bind,source=uv.lock,target=uv.lock \
	14	- --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
	15	- uv sync --frozen --no-install-project
	16	-COPY ./src ./pyproject.toml ./uv.lock ./
	17	-RUN --mount=type=cache,target=/root/.cache/uv \
	18	- uv sync --frozen --extra ui
	19	-ENV WHISPER__MODEL=Systran/faster-whisper-small
	20	-ENV UVICORN_HOST=0.0.0.0
	21	-ENV UVICORN_PORT=8000
	22	-CMD ["uv", "run", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]

8db455c

Dockerfile.cuda (deleted)

--- Dockerfile.cuda

...	...	@@ -1,22 +0,0 @@
	1	-FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
	2	-LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
	3	-# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
	4	-# hadolint ignore=DL3008
	5	-RUN apt-get update && \
	6	- DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
	7	- apt-get clean && \
	8	- rm -rf /var/lib/apt/lists/*
	9	-COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
	10	-WORKDIR /root/faster-whisper-server
	11	-# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
	12	-RUN --mount=type=cache,target=/root/.cache/uv \
	13	- --mount=type=bind,source=uv.lock,target=uv.lock \
	14	- --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
	15	- uv sync --frozen --no-install-project
	16	-COPY ./src ./pyproject.toml ./uv.lock ./
	17	-RUN --mount=type=cache,target=/root/.cache/uv \
	18	- uv sync --frozen --extra ui --extra opentelemetry
	19	-ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
	20	-ENV UVICORN_HOST=0.0.0.0
	21	-ENV UVICORN_PORT=8000
	22	-CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]

8db455c

5ad8603

README.md

--- README.md

+++ README.md


 
 ![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
 
+Using Docker Compose (Recommended)
+NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
+
+```bash
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
+
+# for GPU support
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
+docker compose --file compose.cuda.yaml up --detach
+# for CPU only (use this if you don't have a GPU, as the image is much smaller)
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
+docker compose --file compose.cpu.yaml up --detach
+```
+
 Using Docker
 ```bash
-docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cuda
-# or
-docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu
-```
-Using Docker Compose
-```bash
-curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
-docker compose up --detach faster-whisper-server-cuda
-# or
-docker compose up --detach faster-whisper-server-cpu
+# for GPU support
+docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
+# for CPU only (use this if you don't have a GPU, as the image is much smaller)
+docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
 ```
 
 Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)

8db455c

5ad8603

Taskfile.yaml

--- Taskfile.yaml

+++ Taskfile.yaml


       - pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
     sources:
       - src/**/*.py
-  build:
-    cmds:
-      - docker compose build
-    sources:
-      - Dockerfile.*
-      - src/**/*.py
   create-multi-arch-builder: docker buildx create --name main --driver=docker-container
-  docker-build:
-    cmds:
-      - docker compose build --builder main {{.CLI_ARGS}}
-    sources:
-      - Dockerfile.*
-      - src/faster_whisper_server/*.py
   cii:
     cmds:
       - act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}

5ad8603

compose.cpu.yaml (added)

+++ compose.cpu.yaml

...	...	@@ -0,0 +1,17 @@
	1	+# include:
	2	+# - compose.observability.yaml
	3	+services:
	4	+ faster-whisper-server:
	5	+ extends:
	6	+ file: compose.yaml
	7	+ service: faster-whisper-server
	8	+ image: fedirz/faster-whisper-server:latest-cpu
	9	+ build:
	10	+ args:
	11	+ BASE_IMAGE: ubuntu:24.04
	12	+ environment:
	13	+ - WHISPER__MODEL=Systran/faster-whisper-small
	14	+ volumes:
	15	+ - hugging_face_cache:/root/.cache/huggingface
	16	+volumes:
	17	+ hugging_face_cache:

5ad8603

compose.cuda-cdi.yaml (added)

+++ compose.cuda-cdi.yaml

...	...	@@ -0,0 +1,24 @@
	1	+# include:
	2	+# - compose.observability.yaml
	3	+# This file is for those who have the CDI Docker feature enabled
	4	+# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
	5	+# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
	6	+services:
	7	+ faster-whisper-server:
	8	+ extends:
	9	+ file: compose.cuda.yaml
	10	+ service: faster-whisper-server
	11	+ volumes:
	12	+ - hugging_face_cache:/root/.cache/huggingface
	13	+ deploy:
	14	+ resources:
	15	+ reservations:
	16	+ # WARN: requires Docker Compose 2.24.2
	17	+ # https://docs.docker.com/reference/compose-file/merge/#replace-value
	18	+ devices: !override
	19	+ - capabilities: ["gpu"]
	20	+ driver: cdi
	21	+ device_ids:
	22	+ - nvidia.com/gpu=all
	23	+volumes:
	24	+ hugging_face_cache:

5ad8603

compose.cuda.yaml (added)

+++ compose.cuda.yaml

...	...	@@ -0,0 +1,22 @@
	1	+# include:
	2	+# - compose.observability.yaml
	3	+services:
	4	+ faster-whisper-server:
	5	+ extends:
	6	+ file: compose.yaml
	7	+ service: faster-whisper-server
	8	+ image: fedirz/faster-whisper-server:latest-cuda
	9	+ build:
	10	+ args:
	11	+ BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
	12	+ environment:
	13	+ - WHISPER__MODEL=Systran/faster-whisper-large-v3
	14	+ volumes:
	15	+ - hugging_face_cache:/root/.cache/huggingface
	16	+ deploy:
	17	+ resources:
	18	+ reservations:
	19	+ devices:
	20	+ - capabilities: ["gpu"]
	21	+volumes:
	22	+ hugging_face_cache:

8db455c

5ad8603

compose.observability.yaml (Renamed from observability-compose.yaml)

--- observability-compose.yaml

+++ compose.observability.yaml

No changes

8db455c

5ad8603

compose.yaml

--- compose.yaml

+++ compose.yaml


 # TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
-include:
-  - observability-compose.yaml
 services:
-  faster-whisper-server-cuda:
-    image: fedirz/faster-whisper-server:latest-cuda
+  faster-whisper-server:
+    container_name: faster-whisper-server
     build:
-      dockerfile: Dockerfile.cuda
+      dockerfile: Dockerfile
       context: .
       platforms:
         - linux/amd64

     restart: unless-stopped
     ports:
       - 8000:8000
-    volumes:
-      - hugging_face_cache:/root/.cache/huggingface
     develop:
       watch:
         - path: faster_whisper_server
           action: rebuild
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - capabilities: ["gpu"]
-              # If you have CDI feature enabled use the following instead
-              # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
-              # https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
-              # - driver: cdi
-              #   device_ids:
-              #   - nvidia.com/gpu=all
-  faster-whisper-server-cpu:
-    image: fedirz/faster-whisper-server:latest-cpu
-    build:
-      dockerfile: Dockerfile.cpu
-      context: .
-      platforms:
-        - linux/amd64
-        - linux/arm64
-    restart: unless-stopped
-    ports:
-      - 8000:8000
-    volumes:
-      - hugging_face_cache:/root/.cache/huggingface
-    develop:
-      watch:
-        - path: faster_whisper_server
-          action: rebuild
-volumes:
-  hugging_face_cache:

...	...	@@ -13,11 +13,12 @@
13	13	runs-on: ubuntu-latest
14	14	strategy:
15	15	matrix:
16		- dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
	16	+ # https://hub.docker.com/r/nvidia/cuda/tags
	17	+ base-image: ['nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04']
17	18	include:
18		- - dockerfile: Dockerfile.cuda
	19	+ - base-image: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
19	20	tag-suffix: -cuda
20		- - dockerfile: Dockerfile.cpu
	21	+ - base-image: ubuntu:24.04
21	22	tag-suffix: -cpu
22	23	steps:
23	24	- uses: actions/checkout@v4
...	...	@@ -45,7 +46,9 @@
45	46	uses: docker/build-push-action@v6
46	47	with:
47	48	context: .
48		- file: ${{ matrix.dockerfile }}
	49	+ file: Dockerfile
	50	+ build-args: \|
	51	+ BASE_IMAGE=${{ matrix.base-image }}
49	52	push: true
50	53	platforms: linux/amd64,linux/arm64
51	54	tags: ${{ steps.meta.outputs.tags }}

...	...	@@ -25,18 +25,26 @@
25	25
26	26	![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
27	27
	28	+Using Docker Compose (Recommended)
	29	+NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
	30	+
	31	+```bash
	32	+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
	33	+
	34	+# for GPU support
	35	+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
	36	+docker compose --file compose.cuda.yaml up --detach
	37	+# for CPU only (use this if you don't have a GPU, as the image is much smaller)
	38	+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
	39	+docker compose --file compose.cpu.yaml up --detach
	40	+```
	41	+
28	42	Using Docker
29	43	```bash
30		-docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cuda
31		-# or
32		-docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu
33		-```
34		-Using Docker Compose
35		-```bash
36		-curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
37		-docker compose up --detach faster-whisper-server-cuda
38		-# or
39		-docker compose up --detach faster-whisper-server-cpu
	44	+# for GPU support
	45	+docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
	46	+# for CPU only (use this if you don't have a GPU, as the image is much smaller)
	47	+docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
40	48	```
41	49
42	50	Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)

...	...	@@ -11,19 +11,7 @@
11	11	- pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
12	12	sources:
13	13	- src/*/.py
14		- build:
15		- cmds:
16		- - docker compose build
17		- sources:
18		- - Dockerfile.*
19		- - src/*/.py
20	14	create-multi-arch-builder: docker buildx create --name main --driver=docker-container
21		- docker-build:
22		- cmds:
23		- - docker compose build --builder main {{.CLI_ARGS}}
24		- sources:
25		- - Dockerfile.*
26		- - src/faster_whisper_server/*.py
27	15	cii:
28	16	cmds:
29	17	- act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}

...	...	@@ -1,11 +1,9 @@
1	1	# TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
2		-include:
3		- - observability-compose.yaml
4	2	services:
5		- faster-whisper-server-cuda:
6		- image: fedirz/faster-whisper-server:latest-cuda
	3	+ faster-whisper-server:
	4	+ container_name: faster-whisper-server
7	5	build:
8		- dockerfile: Dockerfile.cuda
	6	+ dockerfile: Dockerfile
9	7	context: .
10	8	platforms:
11	9	- linux/amd64
...	...	@@ -13,39 +11,7 @@
13	11	restart: unless-stopped
14	12	ports:
15	13	- 8000:8000
16		- volumes:
17		- - hugging_face_cache:/root/.cache/huggingface
18	14	develop:
19	15	watch:
20	16	- path: faster_whisper_server
21	17	action: rebuild
22		- deploy:
23		- resources:
24		- reservations:
25		- devices:
26		- - capabilities: ["gpu"]
27		- # If you have CDI feature enabled use the following instead
28		- # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
29		- # https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
30		- # - driver: cdi
31		- # device_ids:
32		- # - nvidia.com/gpu=all
33		- faster-whisper-server-cpu:
34		- image: fedirz/faster-whisper-server:latest-cpu
35		- build:
36		- dockerfile: Dockerfile.cpu
37		- context: .
38		- platforms:
39		- - linux/amd64
40		- - linux/arm64
41		- restart: unless-stopped
42		- ports:
43		- - 8000:8000
44		- volumes:
45		- - hugging_face_cache:/root/.cache/huggingface
46		- develop:
47		- watch:
48		- - path: faster_whisper_server
49		- action: rebuild
50		-volumes:
51		- hugging_face_cache:

Delete comment