Fedir Zadniprovskyi 2024-12-22
feat: reorganize docker files
@5ad8603166d89907f7737e2f81f1a45a04d7c630
.github/workflows/docker-build-and-push.yaml
--- .github/workflows/docker-build-and-push.yaml
+++ .github/workflows/docker-build-and-push.yaml
@@ -13,11 +13,12 @@
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        dockerfile: [Dockerfile.cuda, Dockerfile.cpu]
+        # https://hub.docker.com/r/nvidia/cuda/tags
+        base-image: ['nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04', 'ubuntu:24.04']
         include:
-          - dockerfile: Dockerfile.cuda
+          - base-image: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
             tag-suffix: -cuda
-          - dockerfile: Dockerfile.cpu
+          - base-image: ubuntu:24.04
             tag-suffix: -cpu
     steps:
       - uses: actions/checkout@v4
@@ -45,7 +46,9 @@
         uses: docker/build-push-action@v6
         with:
           context: .
-          file: ${{ matrix.dockerfile }}
+          file: Dockerfile
+          build-args: |
+            BASE_IMAGE=${{ matrix.base-image }}
           push: true
           platforms: linux/amd64,linux/arm64
           tags: ${{ steps.meta.outputs.tags }}
 
Dockerfile (added)
+++ Dockerfile
@@ -0,0 +1,28 @@
+ARG BASE_IMAGE=nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
+FROM ${BASE_IMAGE}
+LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
+# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
+# hadolint ignore=DL3008
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+USER ubuntu
+ENV HOME=/home/ubuntu \
+    PATH=/home/ubuntu/.local/bin:$PATH
+WORKDIR $HOME/faster-whisper-server
+COPY --chown=ubuntu --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
+# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
+# https://docs.astral.sh/uv/guides/integration/docker/#compiling-bytecode
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --compile-bytecode --no-install-project
+COPY --chown=ubuntu ./src ./pyproject.toml ./uv.lock ./
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --compile-bytecode --extra ui --extra opentelemetry
+ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
+ENV UVICORN_HOST=0.0.0.0
+ENV UVICORN_PORT=8000
+EXPOSE 8000
+CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
 
Dockerfile.cpu (deleted)
--- Dockerfile.cpu
@@ -1,22 +0,0 @@
-FROM ubuntu:24.04
-LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
-# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
-# hadolint ignore=DL3008
-RUN apt-get update && \
-  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
-  apt-get clean && \
-  rm -rf /var/lib/apt/lists/*
-COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
-WORKDIR /root/faster-whisper-server
-# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
-RUN --mount=type=cache,target=/root/.cache/uv \
-  --mount=type=bind,source=uv.lock,target=uv.lock \
-  --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
-  uv sync --frozen --no-install-project
-COPY ./src ./pyproject.toml ./uv.lock ./
-RUN --mount=type=cache,target=/root/.cache/uv \
-  uv sync --frozen --extra ui
-ENV WHISPER__MODEL=Systran/faster-whisper-small
-ENV UVICORN_HOST=0.0.0.0
-ENV UVICORN_PORT=8000
-CMD ["uv", "run", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
 
Dockerfile.cuda (deleted)
--- Dockerfile.cuda
@@ -1,22 +0,0 @@
-FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04
-LABEL org.opencontainers.image.source="https://github.com/fedirz/faster-whisper-server"
-# `ffmpeg` is installed because without it `gradio` won't work with mp3(possible others as well) files
-# hadolint ignore=DL3008
-RUN apt-get update && \
-  DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends ffmpeg python3.12 && \
-  apt-get clean && \
-  rm -rf /var/lib/apt/lists/*
-COPY --from=ghcr.io/astral-sh/uv:0.5.11 /uv /bin/uv
-WORKDIR /root/faster-whisper-server
-# https://docs.astral.sh/uv/guides/integration/docker/#intermediate-layers
-RUN --mount=type=cache,target=/root/.cache/uv \
-  --mount=type=bind,source=uv.lock,target=uv.lock \
-  --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
-  uv sync --frozen --no-install-project
-COPY ./src ./pyproject.toml ./uv.lock ./
-RUN --mount=type=cache,target=/root/.cache/uv \
-  uv sync --frozen --extra ui --extra opentelemetry
-ENV WHISPER__MODEL=Systran/faster-whisper-large-v3
-ENV UVICORN_HOST=0.0.0.0
-ENV UVICORN_PORT=8000
-CMD ["uv", "run", "opentelemetry-instrument", "uvicorn", "--factory", "faster_whisper_server.main:create_app"]
README.md
--- README.md
+++ README.md
@@ -25,18 +25,26 @@
 
 ![image](https://github.com/fedirz/faster-whisper-server/assets/76551385/6d215c52-ded5-41d2-89a5-03a6fd113aa0)
 
+Using Docker Compose (Recommended)
+NOTE: I'm using newer Docker Compsose features. If you are using an older version of Docker Compose, you may need need to update.
+
+```bash
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
+
+# for GPU support
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cuda.yaml
+docker compose --file compose.cuda.yaml up --detach
+# for CPU only (use this if you don't have a GPU, as the image is much smaller)
+curl --silent --remote-name https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.cpu.yaml
+docker compose --file compose.cpu.yaml up --detach
+```
+
 Using Docker
 ```bash
-docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cuda
-# or
-docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu
-```
-Using Docker Compose
-```bash
-curl -sO https://raw.githubusercontent.com/fedirz/faster-whisper-server/master/compose.yaml
-docker compose up --detach faster-whisper-server-cuda
-# or
-docker compose up --detach faster-whisper-server-cpu
+# for GPU support
+docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda
+# for CPU only (use this if you don't have a GPU, as the image is much smaller)
+docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu
 ```
 
 Using Kubernetes: [tutorial](https://substratus.ai/blog/deploying-faster-whisper-on-k8s)
Taskfile.yaml
--- Taskfile.yaml
+++ Taskfile.yaml
@@ -11,19 +11,7 @@
       - pytest -o log_cli=true -o log_cli_level=DEBUG {{.CLI_ARGS}}
     sources:
       - src/**/*.py
-  build:
-    cmds:
-      - docker compose build
-    sources:
-      - Dockerfile.*
-      - src/**/*.py
   create-multi-arch-builder: docker buildx create --name main --driver=docker-container
-  docker-build:
-    cmds:
-      - docker compose build --builder main {{.CLI_ARGS}}
-    sources:
-      - Dockerfile.*
-      - src/faster_whisper_server/*.py
   cii:
     cmds:
       - act --rm --action-offline-mode --secret-file .secrets {{.CLI_ARGS}}
 
compose.cpu.yaml (added)
+++ compose.cpu.yaml
@@ -0,0 +1,17 @@
+# include:
+#   - compose.observability.yaml
+services:
+  faster-whisper-server:
+    extends:
+      file: compose.yaml
+      service: faster-whisper-server
+    image: fedirz/faster-whisper-server:latest-cpu
+    build:
+      args:
+        BASE_IMAGE: ubuntu:24.04
+    environment:
+      - WHISPER__MODEL=Systran/faster-whisper-small
+    volumes:
+      - hugging_face_cache:/root/.cache/huggingface
+volumes:
+  hugging_face_cache:
 
compose.cuda-cdi.yaml (added)
+++ compose.cuda-cdi.yaml
@@ -0,0 +1,24 @@
+# include:
+#   - compose.observability.yaml
+# This file is for those who have the CDI Docker feature enabled
+# https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
+# https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
+services:
+  faster-whisper-server:
+    extends:
+      file: compose.cuda.yaml
+      service: faster-whisper-server
+    volumes:
+      - hugging_face_cache:/root/.cache/huggingface
+    deploy:
+      resources:
+        reservations:
+          # WARN: requires Docker Compose 2.24.2
+          # https://docs.docker.com/reference/compose-file/merge/#replace-value
+          devices: !override
+            - capabilities: ["gpu"]
+              driver: cdi
+              device_ids:
+                - nvidia.com/gpu=all
+volumes:
+  hugging_face_cache:
 
compose.cuda.yaml (added)
+++ compose.cuda.yaml
@@ -0,0 +1,22 @@
+# include:
+#   - compose.observability.yaml
+services:
+  faster-whisper-server:
+    extends:
+      file: compose.yaml
+      service: faster-whisper-server
+    image: fedirz/faster-whisper-server:latest-cuda
+    build:
+      args:
+        BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
+    environment:
+      - WHISPER__MODEL=Systran/faster-whisper-large-v3
+    volumes:
+      - hugging_face_cache:/root/.cache/huggingface
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - capabilities: ["gpu"]
+volumes:
+  hugging_face_cache:
compose.observability.yaml (Renamed from observability-compose.yaml)
--- observability-compose.yaml
+++ compose.observability.yaml
No changes
compose.yaml
--- compose.yaml
+++ compose.yaml
@@ -1,11 +1,9 @@
 # TODO: https://docs.astral.sh/uv/guides/integration/docker/#configuring-watch-with-docker-compose
-include:
-  - observability-compose.yaml
 services:
-  faster-whisper-server-cuda:
-    image: fedirz/faster-whisper-server:latest-cuda
+  faster-whisper-server:
+    container_name: faster-whisper-server
     build:
-      dockerfile: Dockerfile.cuda
+      dockerfile: Dockerfile
       context: .
       platforms:
         - linux/amd64
@@ -13,39 +11,7 @@
     restart: unless-stopped
     ports:
       - 8000:8000
-    volumes:
-      - hugging_face_cache:/root/.cache/huggingface
     develop:
       watch:
         - path: faster_whisper_server
           action: rebuild
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - capabilities: ["gpu"]
-              # If you have CDI feature enabled use the following instead
-              # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/cdi-support.html
-              # https://docs.docker.com/reference/cli/dockerd/#enable-cdi-devices
-              # - driver: cdi
-              #   device_ids:
-              #   - nvidia.com/gpu=all
-  faster-whisper-server-cpu:
-    image: fedirz/faster-whisper-server:latest-cpu
-    build:
-      dockerfile: Dockerfile.cpu
-      context: .
-      platforms:
-        - linux/amd64
-        - linux/arm64
-    restart: unless-stopped
-    ports:
-      - 8000:8000
-    volumes:
-      - hugging_face_cache:/root/.cache/huggingface
-    develop:
-      watch:
-        - path: faster_whisper_server
-          action: rebuild
-volumes:
-  hugging_face_cache:
Add a comment
List