

feat: reorganize docker files
@5ad8603166d89907f7737e2f81f1a45a04d7c630
--- .github/workflows/docker-build-and-push.yaml
+++ .github/workflows/docker-build-and-push.yaml
... | ... | @@ -13,11 +13,12 @@ |
13 | 13 |
runs-on: ubuntu-latest |
14 | 14 |
strategy: |
15 | 15 |
matrix: |
16 |
- dockerfile: [Dockerfile.cuda, Dockerfile.cpu] |
|
16 |
+ # https://hub.docker.com/r/nvidia/cuda/tags |
|
17 |
+ base-image: ['nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04'] |
|
17 | 18 |
include: |
18 |
- - dockerfile: Dockerfile.cuda |
|
19 |
+ - base-image: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04 |
|
19 | 20 |
tag-suffix: -cuda |
20 |
- - dockerfile: Dockerfile.cpu |
|
21 |
+ - base-image: ubuntu:24.04 |
|
21 | 22 |
tag-suffix: -cpu |
22 | 23 |
steps: |
23 | 24 |
- uses: actions/checkout@v4 |
... | ... | @@ -45,7 +46,9 @@ |
45 | 46 |
uses: docker/build-push-action@v6 |
46 | 47 |
with: |
47 | 48 |
context: . |
48 |
- file: ${{ matrix.dockerfile }} |
|
49 |
+ file: Dockerfile |
|
50 |
+ build-args: | |
|
51 |
+ BASE_IMAGE=${{ matrix.base-image }} |
|
49 | 52 |
push: true |
50 | 53 |
platforms: linux/amd64,linux/arm64 |
51 | 54 |
tags: ${{ steps.meta.outputs.tags }} |
+++ Dockerfile
... | ... | @@ -0,0 +1,28 @@ |
1 | +ARG BASE_IMAGE=nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04 | |
2 | +FROM ${BASE_IMAGE} | |
3 | +LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server" | |
4 | +# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files | |
5 | +# hadolint ignore=DL3008 | |
6 | +RUN apt-get update && \ | |
7 | + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \ | |
8 | + apt-get clean && \ | |
9 | + rm -rf /var/lib/apt/lists/* | |
10 | +USER ubuntu | |
11 | +ENV HOME=/home/ubuntu \ | |
12 | + PATH=/home/ubuntu/.local/bin:$PATH | |
13 | +WORKDIR $HOME/faster-whisper-server | |
14 | +COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv | |
15 | +# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers | |
16 | +# https://docs.astral.sh/uv/guides/integration/docker/#compiling-bytecode | |
17 | +RUN --mount=type=cache,target=/root/.cache/uv \ | |
18 | + --mount=type=bind,source=uv.lock,target=uv.lock \ | |
19 | + --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ | |
20 | + uv sync --frozen --compile-bytecode --no-install-project | |
21 | +COPY --chown=ubuntu ./src ./pyproject.toml ./uv.lock ./ | |
22 | +RUN --mount=type=cache,target=/root/.cache/uv \ | |
23 | + uv sync --frozen --compile-bytecode --extra ui --extra opentelemetry | |
24 | +ENV WHISPER__MODEL=Systran/faster-whisper-large-v3 | |
25 | +ENV UVICORN_HOST=0.0.0.0 | |
26 | +ENV UVICORN_PORT=8000 | |
27 | +EXPOSE 8000 | |
28 | +CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"] |
--- Dockerfile.cpu
... | ... | @@ -1,22 +0,0 @@ |
1 | -FROM ubuntu:24.04 | |
2 | -LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server" | |
3 | -# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files | |
4 | -# hadolint ignore=DL3008 | |
5 | -RUN apt-get update && \ | |
6 | - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \ | |
7 | - apt-get clean && \ | |
8 | - rm -rf /var/lib/apt/lists/* | |
9 | -COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv | |
10 | -WORKDIR /root/faster-whisper-server | |
11 | -# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers | |
12 | -RUN --mount=type=cache,target=/root/.cache/uv \ | |
13 | - --mount=type=bind,source=uv.lock,target=uv.lock \ | |
14 | - --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ | |
15 | - uv sync --frozen --no-install-project | |
16 | -COPY ./src ./pyproject.toml ./uv.lock ./ | |
17 | -RUN --mount=type=cache,target=/root/.cache/uv \ | |
18 | - uv sync --frozen --extra ui | |
19 | -ENV WHISPER__MODEL=Systran/faster-whisper-small | |
20 | -ENV UVICORN_HOST=0.0.0.0 | |
21 | -ENV UVICORN_PORT=8000 | |
22 | -CMD ["uv", "run", "uvicorn", "--factory", "faster_whisper_server.main:create_app"] |
--- Dockerfile.cuda
... | ... | @@ -1,22 +0,0 @@ |
1 | -FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 | |
2 | -LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server" | |
3 | -# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files | |
4 | -# hadolint ignore=DL3008 | |
5 | -RUN apt-get update && \ | |
6 | - DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \ | |
7 | - apt-get clean && \ | |
8 | - rm -rf /var/lib/apt/lists/* | |
9 | -COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv | |
10 | -WORKDIR /root/faster-whisper-server | |
11 | -# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers | |
12 | -RUN --mount=type=cache,target=/root/.cache/uv \ | |
13 | - --mount=type=bind,source=uv.lock,target=uv.lock \ | |
14 | - --mount=type=bind,source=pyproject.toml,target=pyproject.toml \ | |
15 | - uv sync --frozen --no-install-project | |
16 | -COPY ./src ./pyproject.toml ./uv.lock ./ | |
17 | -RUN --mount=type=cache,target=/root/.cache/uv \ | |
18 | - uv sync --frozen --extra ui --extra opentelemetry | |
19 | -ENV WHISPER__MODEL=Systran/faster-whisper-large-v3 | |
20 | -ENV UVICORN_HOST=0.0.0.0 | |
21 | -ENV UVICORN_PORT=8000 | |
22 | -CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"] |
--- README.md
+++ README.md
... | ... | @@ -25,18 +25,26 @@ |
25 | 25 |
|
26 | 26 |
 |
27 | 27 |
|
28 |
+Using Docker Compose (Recommended) |
|
29 |
+NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update. |
|
30 |
+ |
|
31 |
+```bash |
|
32 |
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml |
|
33 |
+ |
|
34 |
+# for GPU support |
|
35 |
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml |
|
36 |
+docker compose --file compose.cuda.yaml up --detach |
|
37 |
+# for CPU only (use this if you don't have a GPU, as the image is much smaller) |
|
38 |
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml |
|
39 |
+docker compose --file compose.cpu.yaml up --detach |
|
40 |
+``` |
|
41 |
+ |
|
28 | 42 |
Using Docker |
29 | 43 |
```bash |
30 |
-docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cuda |
|
31 |
-# or |
|
32 |
-docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu |
|
33 |
-``` |
|
34 |
-Using Docker Compose |
|
35 |
-```bash |
|
36 |
-curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml |
|
37 |
-docker compose up --detach faster-whisper-server-cuda |
|
38 |
-# or |
|
39 |
-docker compose up --detach faster-whisper-server-cpu |
|
44 |
+# for GPU support |
|
45 |
+docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda |
|
46 |
+# for CPU only (use this if you don't have a GPU, as the image is much smaller) |
|
47 |
+docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu |
|
40 | 48 |
``` |
41 | 49 |
|
42 | 50 |
Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s) |
--- Taskfile.yaml
+++ Taskfile.yaml
... | ... | @@ -11,19 +11,7 @@ |
11 | 11 |
- pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}} |
12 | 12 |
sources: |
13 | 13 |
- src/**/*.py |
14 |
- build: |
|
15 |
- cmds: |
|
16 |
- - docker compose build |
|
17 |
- sources: |
|
18 |
- - Dockerfile.* |
|
19 |
- - src/**/*.py |
|
20 | 14 |
create-multi-arch-builder: docker buildx create --name main --driver=docker-container |
21 |
- docker-build: |
|
22 |
- cmds: |
|
23 |
- - docker compose build --builder main {{.CLI_ARGS}} |
|
24 |
- sources: |
|
25 |
- - Dockerfile.* |
|
26 |
- - src/faster_whisper_server/*.py |
|
27 | 15 |
cii: |
28 | 16 |
cmds: |
29 | 17 |
- act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}} |
+++ compose.cpu.yaml
... | ... | @@ -0,0 +1,17 @@ |
1 | +# include: | |
2 | +# - compose.observability.yaml | |
3 | +services: | |
4 | + faster-whisper-server: | |
5 | + extends: | |
6 | + file: compose.yaml | |
7 | + service: faster-whisper-server | |
8 | + image: fedirz/faster-whisper-server:latest-cpu | |
9 | + build: | |
10 | + args: | |
11 | + BASE_IMAGE: ubuntu:24.04 | |
12 | + environment: | |
13 | + - WHISPER__MODEL=Systran/faster-whisper-small | |
14 | + volumes: | |
15 | + - hugging_face_cache:/root/.cache/huggingface | |
16 | +volumes: | |
17 | + hugging_face_cache: |
+++ compose.cuda-cdi.yaml
... | ... | @@ -0,0 +1,24 @@ |
1 | +# include: | |
2 | +# - compose.observability.yaml | |
3 | +# This file is for those who have the CDI Docker feature enabled | |
4 | +# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html | |
5 | +# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices | |
6 | +services: | |
7 | + faster-whisper-server: | |
8 | + extends: | |
9 | + file: compose.cuda.yaml | |
10 | + service: faster-whisper-server | |
11 | + volumes: | |
12 | + - hugging_face_cache:/root/.cache/huggingface | |
13 | + deploy: | |
14 | + resources: | |
15 | + reservations: | |
16 | + # WARN: requires Docker Compose 2.24.2 | |
17 | + # https://docs.docker.com/reference/compose-file/merge/#replace-value | |
18 | + devices: !override | |
19 | + - capabilities: ["gpu"] | |
20 | + driver: cdi | |
21 | + device_ids: | |
22 | + - nvidia.com/gpu=all | |
23 | +volumes: | |
24 | + hugging_face_cache: |
+++ compose.cuda.yaml
... | ... | @@ -0,0 +1,22 @@ |
1 | +# include: | |
2 | +# - compose.observability.yaml | |
3 | +services: | |
4 | + faster-whisper-server: | |
5 | + extends: | |
6 | + file: compose.yaml | |
7 | + service: faster-whisper-server | |
8 | + image: fedirz/faster-whisper-server:latest-cuda | |
9 | + build: | |
10 | + args: | |
11 | + BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04 | |
12 | + environment: | |
13 | + - WHISPER__MODEL=Systran/faster-whisper-large-v3 | |
14 | + volumes: | |
15 | + - hugging_face_cache:/root/.cache/huggingface | |
16 | + deploy: | |
17 | + resources: | |
18 | + reservations: | |
19 | + devices: | |
20 | + - capabilities: ["gpu"] | |
21 | +volumes: | |
22 | + hugging_face_cache: |
--- observability-compose.yaml
+++ compose.observability.yaml
No changes |
--- compose.yaml
+++ compose.yaml
... | ... | @@ -1,11 +1,9 @@ |
1 | 1 |
# TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose |
2 |
-include: |
|
3 |
- - observability-compose.yaml |
|
4 | 2 |
services: |
5 |
- faster-whisper-server-cuda: |
|
6 |
- image: fedirz/faster-whisper-server:latest-cuda |
|
3 |
+ faster-whisper-server: |
|
4 |
+ container_name: faster-whisper-server |
|
7 | 5 |
build: |
8 |
- dockerfile: Dockerfile.cuda |
|
6 |
+ dockerfile: Dockerfile |
|
9 | 7 |
context: . |
10 | 8 |
platforms: |
11 | 9 |
- linux/amd64 |
... | ... | @@ -13,39 +11,7 @@ |
13 | 11 |
restart: unless-stopped |
14 | 12 |
ports: |
15 | 13 |
- 8000:8000 |
16 |
- volumes: |
|
17 |
- - hugging_face_cache:/root/.cache/huggingface |
|
18 | 14 |
develop: |
19 | 15 |
watch: |
20 | 16 |
- path: faster_whisper_server |
21 | 17 |
action: rebuild |
22 |
- deploy: |
|
23 |
- resources: |
|
24 |
- reservations: |
|
25 |
- devices: |
|
26 |
- - capabilities: ["gpu"] |
|
27 |
- # If you have CDI feature enabled use the following instead |
|
28 |
- # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html |
|
29 |
- # https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices |
|
30 |
- # - driver: cdi |
|
31 |
- # device_ids: |
|
32 |
- # - nvidia.com/gpu=all |
|
33 |
- faster-whisper-server-cpu: |
|
34 |
- image: fedirz/faster-whisper-server:latest-cpu |
|
35 |
- build: |
|
36 |
- dockerfile: Dockerfile.cpu |
|
37 |
- context: . |
|
38 |
- platforms: |
|
39 |
- - linux/amd64 |
|
40 |
- - linux/arm64 |
|
41 |
- restart: unless-stopped |
|
42 |
- ports: |
|
43 |
- - 8000:8000 |
|
44 |
- volumes: |
|
45 |
- - hugging_face_cache:/root/.cache/huggingface |
|
46 |
- develop: |
|
47 |
- watch: |
|
48 |
- - path: faster_whisper_server |
|
49 |
- action: rebuild |
|
50 |
-volumes: |
|
51 |
- hugging_face_cache: |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?