Commit @8202b524ebcf973a779622e211f48cb293c5b885 - yjyoon/whisper_server

Fedir Zadniprovskyi 2024-07-16

docs: add js example

Addresses #26

@8202b524ebcf973a779622e211f48cb293c5b885

8202b52

examples/javascript/index.js (added)

+++ examples/javascript/index.js

...	...	@@ -0,0 +1,159 @@
	1	+/**
	2	+ * Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26
	3	+ */
	4	+import fs from 'fs';
	5	+import WebSocket from 'ws';
	6	+import fetch from 'node-fetch';
	7	+import FormData from 'form-data';
	8	+import path from 'path';
	9	+import ffmpeg from 'fluent-ffmpeg';
	10	+import dotenv from 'dotenv';
	11	+
	12	+dotenv.config();
	13	+
	14	+const ffmpegPath = process.env.FFMPEG_PATH \|\| '/usr/bin/ffmpeg';
	15	+ffmpeg.setFfmpegPath(ffmpegPath);
	16	+
	17	+/**
	18	+ * Transcribe an audio file using the HTTP endpoint.
	19	+ * Supported file types include wav, mp3, webm, and other types supported by the OpenAI API.
	20	+ * I have tested with these three types.
	21	+ *
	22	+ * @param {string} filePath - Path to the audio file
	23	+ * @param {string} model - Model name
	24	+ * @param {string} language - Language code
	25	+ * @param {string} responseFormat - Response format
	26	+ * @param {string} temperature - Temperature setting
	27	+ */
	28	+async function transcribeFile(filePath, model, language, responseFormat, temperature) {
	29	+ const formData = new FormData();
	30	+ formData.append('file', fs.createReadStream(filePath));
	31	+ formData.append('model', model);
	32	+ formData.append('language', language);
	33	+ formData.append('response_format', responseFormat);
	34	+ formData.append('temperature', temperature);
	35	+
	36	+ const response = await fetch(`${process.env.TRANSCRIPTION_API_BASE_URL}/v1/audio/transcriptions`, {
	37	+ method: 'POST',
	38	+ body: formData,
	39	+ });
	40	+
	41	+ const transcription = await response.json();
	42	+ console.log('Transcription Response:', transcription);
	43	+}
	44	+
	45	+/**
	46	+ * Translate an audio file using the HTTP endpoint.
	47	+ * Only English is supported for translation.
	48	+ * Currently, I am using GLM-4-9b-int8 to translate various voices.
	49	+ * I am not sure if the author can add an endpoint for custom API+Key translation.
	50	+ * I plan to package my frontend, fast-whisper-server, and vllm+glm-4-9b-int8 into one Docker container for unified deployment.
	51	+ *
	52	+ * @param {string} filePath - Path to the audio file
	53	+ * @param {string} model - Model name
	54	+ * @param {string} responseFormat - Response format
	55	+ * @param {string} temperature - Temperature setting
	56	+ */
	57	+async function translateFile(filePath, model, responseFormat, temperature) {
	58	+ const formData = new FormData();
	59	+ formData.append('file', fs.createReadStream(filePath));
	60	+ formData.append('model', model);
	61	+ formData.append('response_format', responseFormat);
	62	+ formData.append('temperature', temperature);
	63	+
	64	+ const response = await fetch(`${process.env.TRANSLATION_API_BASE_URL}/v1/audio/translations`, {
	65	+ method: 'POST',
	66	+ body: formData,
	67	+ });
	68	+
	69	+ const translation = await response.json();
	70	+ console.log('Translation Response:', translation);
	71	+}
	72	+
	73	+/**
	74	+ * Send audio data over WebSocket for transcription.
	75	+ * Currently, the supported file type for transcription is PCM.
	76	+ * I am not sure if other types are supported.
	77	+ *
	78	+ * @param {string} filePath - Path to the audio file
	79	+ * @param {string} model - Model name
	80	+ * @param {string} language - Language code
	81	+ * @param {string} responseFormat - Response format
	82	+ * @param {string} temperature - Temperature setting
	83	+ */
	84	+async function sendAudioOverWebSocket(filePath, model, language, responseFormat, temperature) {
	85	+ const wsUrl = `ws://100.105.162.69:8000/v1/audio/transcriptions?model=${encodeURIComponent(model)}&language=${encodeURIComponent(language)}&response_format=${encodeURIComponent(responseFormat)}&temperature=${encodeURIComponent(temperature)}`;
	86	+ const ws = new WebSocket(wsUrl);
	87	+
	88	+ ws.on('open', async () => {
	89	+ const audioBuffer = fs.readFileSync(filePath);
	90	+ ws.send(audioBuffer);
	91	+ });
	92	+
	93	+ ws.on('message', (message) => {
	94	+ const response = JSON.parse(message);
	95	+ console.log('WebSocket Response:', response);
	96	+ });
	97	+
	98	+ ws.on('close', () => {
	99	+ console.log('WebSocket connection closed');
	100	+ });
	101	+
	102	+ ws.on('error', (error) => {
	103	+ console.error('WebSocket error:', error);
	104	+ });
	105	+}
	106	+
	107	+/**
	108	+ * Convert audio file to PCM format.
	109	+ *
	110	+ * @param {string} filePath - Path to the audio file
	111	+ * @returns {string} - Path to the converted PCM file
	112	+ */
	113	+async function convertToPcm(filePath) {
	114	+ const pcmFilePath = filePath.replace(path.extname(filePath), '.pcm');
	115	+
	116	+ await new Promise((resolve, reject) => {
	117	+ ffmpeg(filePath)
	118	+ .audioChannels(1)
	119	+ .audioFrequency(16000)
	120	+ .audioCodec('pcm_s16le')
	121	+ .toFormat('s16le')
	122	+ .on('end', () => {
	123	+ console.log(`Audio file successfully converted to PCM: ${pcmFilePath}`);
	124	+ resolve(pcmFilePath);
	125	+ })
	126	+ .on('error', (error) => {
	127	+ console.error(`Error converting audio to PCM: ${error.message}`);
	128	+ reject(error);
	129	+ })
	130	+ .save(pcmFilePath);
	131	+ });
	132	+
	133	+ return pcmFilePath;
	134	+}
	135	+
	136	+async function main() {
	137	+ const model = 'Systran/faster-whisper-large-v3';
	138	+ const language = 'en';
	139	+ const responseFormat = 'json';
	140	+ const temperature = '0';
	141	+ const filePath = './path/to/your/audio.webm'; // Replace with the actual file path
	142	+
	143	+ // Convert the audio file to PCM format
	144	+ const pcmFilePath = await convertToPcm(filePath);
	145	+
	146	+ // Transcribe the audio file using the HTTP endpoint
	147	+ await transcribeFile(pcmFilePath, model, language, responseFormat, temperature);
	148	+
	149	+ // Translate the audio file using the HTTP endpoint
	150	+ await translateFile(pcmFilePath, model, responseFormat, temperature);
	151	+
	152	+ // Transcribe the audio file using the WebSocket endpoint
	153	+ await sendAudioOverWebSocket(pcmFilePath, model, language, responseFormat, temperature);
	154	+}
	155	+
	156	+// Make sure to use ffmpeg version 7 or above. The default apt-get install only installs version 4.x. Also, Ubuntu 22.04 or above is required to support version 7.x.
	157	+main().catch(console.error);
	158	+
	159	+// Project URL: https://github.com/Gan-Xing/whisper

Add a comment

Open 0
Closed 0

List

Delete comment