

chore: update volume names and mount points
@31537d182e79f198a6083d01495da7670d0b3e27
--- Dockerfile
+++ Dockerfile
... | ... | @@ -29,7 +29,7 @@ |
29 | 29 |
# Creating a directory for the cache to avoid the following error: |
30 | 30 |
# PermissionError: [Errno 13] Permission denied: '/home/ubuntu/.cache/huggingface/hub' |
31 | 31 |
# This error occurs because the volume is mounted as root and the `ubuntu` user doesn't have permission to write to it. Pre-creating the directory solves this issue. |
32 |
-RUN mkdir -p $HOME/.cache/huggingface |
|
32 |
+RUN mkdir -p $HOME/.cache/huggingface/hub |
|
33 | 33 |
ENV WHISPER__MODEL=Systran/faster-whisper-large-v3 |
34 | 34 |
ENV UVICORN_HOST=0.0.0.0 |
35 | 35 |
ENV UVICORN_PORT=8000 |
--- README.md
+++ README.md
... | ... | @@ -49,9 +49,9 @@ |
49 | 49 |
|
50 | 50 |
```bash |
51 | 51 |
# for GPU support |
52 |
-docker run --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --detach fedirz/faster-whisper-server:latest-cuda |
|
52 |
+docker run --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --detach fedirz/faster-whisper-server:latest-cuda |
|
53 | 53 |
# for CPU only (use this if you don't have a GPU, as the image is much smaller) |
54 |
-docker run --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu |
|
54 |
+docker run --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=Systran/faster-whisper-small --detach fedirz/faster-whisper-server:latest-cpu |
|
55 | 55 |
``` |
56 | 56 |
|
57 | 57 |
### Using Kubernetes |
--- audio.wav
+++ audio.wav
Binary file is not shown |
--- compose.cpu.yaml
+++ compose.cpu.yaml
... | ... | @@ -12,6 +12,6 @@ |
12 | 12 |
environment: |
13 | 13 |
- WHISPER__MODEL=Systran/faster-whisper-small |
14 | 14 |
volumes: |
15 |
- - hugging_face_cache:/root/.cache/huggingface |
|
15 |
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub |
|
16 | 16 |
volumes: |
17 |
- hugging_face_cache: |
|
17 |
+ hf-hub-cache: |
--- compose.cuda-cdi.yaml
+++ compose.cuda-cdi.yaml
... | ... | @@ -9,7 +9,7 @@ |
9 | 9 |
file: compose.cuda.yaml |
10 | 10 |
service: faster-whisper-server |
11 | 11 |
volumes: |
12 |
- - hugging_face_cache:/root/.cache/huggingface |
|
12 |
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub |
|
13 | 13 |
deploy: |
14 | 14 |
resources: |
15 | 15 |
reservations: |
... | ... | @@ -21,4 +21,4 @@ |
21 | 21 |
device_ids: |
22 | 22 |
- nvidia.com/gpu=all |
23 | 23 |
volumes: |
24 |
- hugging_face_cache: |
|
24 |
+ hf-hub-cache: |
--- compose.cuda.yaml
+++ compose.cuda.yaml
... | ... | @@ -12,11 +12,11 @@ |
12 | 12 |
environment: |
13 | 13 |
- WHISPER__MODEL=Systran/faster-whisper-large-v3 |
14 | 14 |
volumes: |
15 |
- - hugging_face_cache:/root/.cache/huggingface |
|
15 |
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub |
|
16 | 16 |
deploy: |
17 | 17 |
resources: |
18 | 18 |
reservations: |
19 | 19 |
devices: |
20 | 20 |
- capabilities: ["gpu"] |
21 | 21 |
volumes: |
22 |
- hugging_face_cache: |
|
22 |
+ hf-hub-cache: |
--- docs/installation.md
+++ docs/installation.md
... | ... | @@ -13,14 +13,14 @@ |
13 | 13 |
ports: |
14 | 14 |
- 8000:8000 |
15 | 15 |
volumes: |
16 |
- - hugging_face_cache:/root/.cache/huggingface |
|
16 |
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub |
|
17 | 17 |
deploy: |
18 | 18 |
resources: |
19 | 19 |
reservations: |
20 | 20 |
devices: |
21 | 21 |
- capabilities: ["gpu"] |
22 | 22 |
volumes: |
23 |
- hugging_face_cache: |
|
23 |
+ hf-hub-cache: |
|
24 | 24 |
``` |
25 | 25 |
|
26 | 26 |
=== "CUDA (with CDI feature enabled)" |
... | ... | @@ -35,7 +35,7 @@ |
35 | 35 |
ports: |
36 | 36 |
- 8000:8000 |
37 | 37 |
volumes: |
38 |
- - hugging_face_cache:/root/.cache/huggingface |
|
38 |
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub |
|
39 | 39 |
deploy: |
40 | 40 |
resources: |
41 | 41 |
reservations: |
... | ... | @@ -46,7 +46,7 @@ |
46 | 46 |
device_ids: |
47 | 47 |
- nvidia.com/gpu=all |
48 | 48 |
volumes: |
49 |
- hugging_face_cache: |
|
49 |
+ hf-hub-cache: |
|
50 | 50 |
``` |
51 | 51 |
|
52 | 52 |
=== "CPU" |
... | ... | @@ -60,9 +60,9 @@ |
60 | 60 |
ports: |
61 | 61 |
- 8000:8000 |
62 | 62 |
volumes: |
63 |
- - hugging_face_cache:/root/.cache/huggingface |
|
63 |
+ - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub |
|
64 | 64 |
volumes: |
65 |
- hugging_face_cache: |
|
65 |
+ hf-hub-cache: |
|
66 | 66 |
``` |
67 | 67 |
|
68 | 68 |
## Docker |
... | ... | @@ -70,19 +70,19 @@ |
70 | 70 |
=== "CUDA" |
71 | 71 |
|
72 | 72 |
```bash |
73 |
- docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface --gpus=all fedirz/faster-whisper-server:latest-cuda |
|
73 |
+ docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --gpus=all fedirz/faster-whisper-server:latest-cuda |
|
74 | 74 |
``` |
75 | 75 |
|
76 | 76 |
=== "CUDA (with CDI feature enabled)" |
77 | 77 |
|
78 | 78 |
```bash |
79 |
- docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface --device=nvidia.com/gpu=all fedirz/faster-whisper-server:latest-cuda |
|
79 |
+ docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --device=nvidia.com/gpu=all fedirz/faster-whisper-server:latest-cuda |
|
80 | 80 |
``` |
81 | 81 |
|
82 | 82 |
=== "CPU" |
83 | 83 |
|
84 | 84 |
```bash |
85 |
- docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hugging_face_cache:/root/.cache/huggingface fedirz/faster-whisper-server:latest-cpu |
|
85 |
+ docker run --rm --detach --publish 8000:8000 --name faster-whisper-server --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub fedirz/faster-whisper-server:latest-cpu |
|
86 | 86 |
``` |
87 | 87 |
|
88 | 88 |
## Kubernetes |
--- examples/live-audio/script.sh
+++ examples/live-audio/script.sh
... | ... | @@ -10,9 +10,9 @@ |
10 | 10 |
export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference. |
11 | 11 |
|
12 | 12 |
# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`. |
13 |
-docker run --detach --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda |
|
13 |
+docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda |
|
14 | 14 |
# or you can run it on a CPU |
15 |
-# docker run --detach --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu |
|
15 |
+# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu |
|
16 | 16 |
|
17 | 17 |
# `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le -` |
18 | 18 |
# shellcheck disable=SC2002 |
--- examples/youtube/script.sh
+++ examples/youtube/script.sh
... | ... | @@ -6,9 +6,9 @@ |
6 | 6 |
export WHISPER__MODEL=Systran/faster-distil-whisper-large-v3 # or Systran/faster-whisper-tiny.en if you are running on a CPU for a faster inference. |
7 | 7 |
|
8 | 8 |
# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`. |
9 |
-docker run --detach --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda |
|
9 |
+docker run --detach --gpus=all --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cuda |
|
10 | 10 |
# or you can run it on a CPU |
11 |
-# docker run --detach --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu |
|
11 |
+# docker run --detach --publish 8000:8000 --volume hf-hub-cache:/home/ubuntu/.cache/huggingface/hub --env WHISPER__MODEL=$WHISPER__MODEL fedirz/faster-whisper-server:latest-cpu |
|
12 | 12 |
|
13 | 13 |
# Download the audio from a YouTube video. In this example I'm downloading "The Evolution of the Operating System" by Asionometry YouTube channel. I highly checking this channel out, the guy produces very high content. If you don't have `youtube-dl`, you'll have to install it. https://github.com/ytdl-org/youtube-dl |
14 | 14 |
youtube-dl --extract-audio --audio-format mp3 -o the-evolution-of-the-operating-system.mp3 'https://www.youtube.com/watch?v=1lG7lFLXBIs' |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?