

docs: add live-transcription demo
@6f7c497afda10a3fea1b5590b4f7f3584acf57d4
--- .gitattributes
+++ .gitattributes
... | ... | @@ -1,1 +1,3 @@ |
1 | 1 |
/examples/youtube/the-evolution-of-the-operating-system.mp3 filter=lfs diff=lfs merge=lfs -text |
2 |
+/examples/live-audio/audio.pcm filter=lfs diff=lfs merge=lfs -text |
|
3 |
+/examples/live-audio/demo.mp4 filter=lfs diff=lfs merge=lfs -text |
+++ examples/live-audio/script.sh
... | ... | @@ -0,0 +1,19 @@ |
1 | +#!/usr/bin/env bash | |
2 | + | |
3 | +set -e | |
4 | + | |
5 | +# The audio file was copied from the `youtube` example and converted to a raw, single channel, 16000 sample rate, 16-bit little-endian PCM audio file. | |
6 | +# cp ../youtube/the-evolution-of-the-operating-system.mp3 ./audio.mp3 | |
7 | +# ffmpeg -y -hide_banner -loglevel quiet -i audio.mp3 -ac 1 -ar 16000 -f s16le -acodec pcm_s16le audio.pcm | |
8 | +# rm -f audio.mp3 | |
9 | + | |
10 | +export WHISPER_MODEL=distil-large-v3 # or tiny.en if you are running on a CPU for a faster inference. | |
11 | + | |
12 | +# Ensure you have `faster-whisper-server` running. If this is your first time running it expect to wait up-to a minute for the model to be downloaded and loaded into memory. You can run `curl localhost:8000/health` to check if the server is ready or watch the logs with `docker logs -f <container_id>`. | |
13 | +docker run --detach --gpus=all --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER_MODEL=$WHISPER_MODEL fedirz/faster-whisper-server:0.1-cuda | |
14 | +# or you can run it on a CPU | |
15 | +# docker run --detach --publish 8000:8000 --volume ~/.cache/huggingface:/root/.cache/huggingface --env WHISPER_MODEL=$WHISPER_MODEL fedirz/faster-whisper-server:0.1-cpu | |
16 | + | |
17 | +# `pv` is used to limit the rate at which the audio is streamed to the server. Audio is being streamed at a rate of 32kb/s(16000 sample rate * 16-bit sample / 8 bits per byte = 32000 bytes per second). This emulutes live audio input from a microphone: `ffmpeg -loglevel quiet -f alsa -i default -ac 1 -ar 16000 -f s16le` | |
18 | +# shellcheck disable=SC2002 | |
19 | +cat audio.pcm | pv -qL 32000 | websocat --no-close --binary ws://localhost:8000/v1/audio/transcriptions?language=en |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?