

docs: add js example
Addresses #26
@8202b524ebcf973a779622e211f48cb293c5b885
+++ examples/javascript/index.js
... | ... | @@ -0,0 +1,159 @@ |
1 | +/** | |
2 | + * Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26 | |
3 | + */ | |
4 | +import fs from 'fs'; | |
5 | +import WebSocket from 'ws'; | |
6 | +import fetch from 'node-fetch'; | |
7 | +import FormData from 'form-data'; | |
8 | +import path from 'path'; | |
9 | +import ffmpeg from 'fluent-ffmpeg'; | |
10 | +import dotenv from 'dotenv'; | |
11 | + | |
12 | +dotenv.config(); | |
13 | + | |
14 | +const ffmpegPath = process.env.FFMPEG_PATH || '/usr/bin/ffmpeg'; | |
15 | +ffmpeg.setFfmpegPath(ffmpegPath); | |
16 | + | |
17 | +/** | |
18 | + * Transcribe an audio file using the HTTP endpoint. | |
19 | + * Supported file types include wav, mp3, webm, and other types supported by the OpenAI API. | |
20 | + * I have tested with these three types. | |
21 | + * | |
22 | + * @param {string} filePath - Path to the audio file | |
23 | + * @param {string} model - Model name | |
24 | + * @param {string} language - Language code | |
25 | + * @param {string} responseFormat - Response format | |
26 | + * @param {string} temperature - Temperature setting | |
27 | + */ | |
28 | +async function transcribeFile(filePath, model, language, responseFormat, temperature) { | |
29 | + const formData = new FormData(); | |
30 | + formData.append('file', fs.createReadStream(filePath)); | |
31 | + formData.append('model', model); | |
32 | + formData.append('language', language); | |
33 | + formData.append('response_format', responseFormat); | |
34 | + formData.append('temperature', temperature); | |
35 | + | |
36 | + const response = await fetch(`${process.env.TRANSCRIPTION_API_BASE_URL}/v1/audio/transcriptions`, { | |
37 | + method: 'POST', | |
38 | + body: formData, | |
39 | + }); | |
40 | + | |
41 | + const transcription = await response.json(); | |
42 | + console.log('Transcription Response:', transcription); | |
43 | +} | |
44 | + | |
45 | +/** | |
46 | + * Translate an audio file using the HTTP endpoint. | |
47 | + * Only English is supported for translation. | |
48 | + * Currently, I am using GLM-4-9b-int8 to translate various voices. | |
49 | + * I am not sure if the author can add an endpoint for custom API+Key translation. | |
50 | + * I plan to package my frontend, fast-whisper-server, and vllm+glm-4-9b-int8 into one Docker container for unified deployment. | |
51 | + * | |
52 | + * @param {string} filePath - Path to the audio file | |
53 | + * @param {string} model - Model name | |
54 | + * @param {string} responseFormat - Response format | |
55 | + * @param {string} temperature - Temperature setting | |
56 | + */ | |
57 | +async function translateFile(filePath, model, responseFormat, temperature) { | |
58 | + const formData = new FormData(); | |
59 | + formData.append('file', fs.createReadStream(filePath)); | |
60 | + formData.append('model', model); | |
61 | + formData.append('response_format', responseFormat); | |
62 | + formData.append('temperature', temperature); | |
63 | + | |
64 | + const response = await fetch(`${process.env.TRANSLATION_API_BASE_URL}/v1/audio/translations`, { | |
65 | + method: 'POST', | |
66 | + body: formData, | |
67 | + }); | |
68 | + | |
69 | + const translation = await response.json(); | |
70 | + console.log('Translation Response:', translation); | |
71 | +} | |
72 | + | |
73 | +/** | |
74 | + * Send audio data over WebSocket for transcription. | |
75 | + * Currently, the supported file type for transcription is PCM. | |
76 | + * I am not sure if other types are supported. | |
77 | + * | |
78 | + * @param {string} filePath - Path to the audio file | |
79 | + * @param {string} model - Model name | |
80 | + * @param {string} language - Language code | |
81 | + * @param {string} responseFormat - Response format | |
82 | + * @param {string} temperature - Temperature setting | |
83 | + */ | |
84 | +async function sendAudioOverWebSocket(filePath, model, language, responseFormat, temperature) { | |
85 | + const wsUrl = `ws://100.105.162.69:8000/v1/audio/transcriptions?model=${encodeURIComponent(model)}&language=${encodeURIComponent(language)}&response_format=${encodeURIComponent(responseFormat)}&temperature=${encodeURIComponent(temperature)}`; | |
86 | + const ws = new WebSocket(wsUrl); | |
87 | + | |
88 | + ws.on('open', async () => { | |
89 | + const audioBuffer = fs.readFileSync(filePath); | |
90 | + ws.send(audioBuffer); | |
91 | + }); | |
92 | + | |
93 | + ws.on('message', (message) => { | |
94 | + const response = JSON.parse(message); | |
95 | + console.log('WebSocket Response:', response); | |
96 | + }); | |
97 | + | |
98 | + ws.on('close', () => { | |
99 | + console.log('WebSocket connection closed'); | |
100 | + }); | |
101 | + | |
102 | + ws.on('error', (error) => { | |
103 | + console.error('WebSocket error:', error); | |
104 | + }); | |
105 | +} | |
106 | + | |
107 | +/** | |
108 | + * Convert audio file to PCM format. | |
109 | + * | |
110 | + * @param {string} filePath - Path to the audio file | |
111 | + * @returns {string} - Path to the converted PCM file | |
112 | + */ | |
113 | +async function convertToPcm(filePath) { | |
114 | + const pcmFilePath = filePath.replace(path.extname(filePath), '.pcm'); | |
115 | + | |
116 | + await new Promise((resolve, reject) => { | |
117 | + ffmpeg(filePath) | |
118 | + .audioChannels(1) | |
119 | + .audioFrequency(16000) | |
120 | + .audioCodec('pcm_s16le') | |
121 | + .toFormat('s16le') | |
122 | + .on('end', () => { | |
123 | + console.log(`Audio file successfully converted to PCM: ${pcmFilePath}`); | |
124 | + resolve(pcmFilePath); | |
125 | + }) | |
126 | + .on('error', (error) => { | |
127 | + console.error(`Error converting audio to PCM: ${error.message}`); | |
128 | + reject(error); | |
129 | + }) | |
130 | + .save(pcmFilePath); | |
131 | + }); | |
132 | + | |
133 | + return pcmFilePath; | |
134 | +} | |
135 | + | |
136 | +async function main() { | |
137 | + const model = 'Systran/faster-whisper-large-v3'; | |
138 | + const language = 'en'; | |
139 | + const responseFormat = 'json'; | |
140 | + const temperature = '0'; | |
141 | + const filePath = './path/to/your/audio.webm'; // Replace with the actual file path | |
142 | + | |
143 | + // Convert the audio file to PCM format | |
144 | + const pcmFilePath = await convertToPcm(filePath); | |
145 | + | |
146 | + // Transcribe the audio file using the HTTP endpoint | |
147 | + await transcribeFile(pcmFilePath, model, language, responseFormat, temperature); | |
148 | + | |
149 | + // Translate the audio file using the HTTP endpoint | |
150 | + await translateFile(pcmFilePath, model, responseFormat, temperature); | |
151 | + | |
152 | + // Transcribe the audio file using the WebSocket endpoint | |
153 | + await sendAudioOverWebSocket(pcmFilePath, model, language, responseFormat, temperature); | |
154 | +} | |
155 | + | |
156 | +// Make sure to use ffmpeg version 7 or above. The default apt-get install only installs version 4.x. Also, Ubuntu 22.04 or above is required to support version 7.x. | |
157 | +main().catch(console.error); | |
158 | + | |
159 | +// Project URL: https://github.com/Gan-Xing/whisper |
Add a comment
Delete comment
Once you delete this comment, you won't be able to recover it. Are you sure you want to delete this comment?