• Y
  • List All
  • Feedback
    • This Project
    • All Projects
Profile Account settings Log out
  • Favorite
  • Project
  • All
Loading...
  • Log in
  • Sign up
yjyoon / whisper_client star
  • Project homeH
  • CodeC
  • IssueI
  • Pull requestP
  • Review R
  • MilestoneM
  • BoardB
  • Files
  • Commit
  • Branches
whisper_clientclient.html
Download as .zip file
File name
Commit message
Commit date
README.md
Hello Yona
01-17
client.html
Hello Yona
01-17
client_with_openAI.html
Hello Yona
01-17
get_microphone.py
Hello Yona
01-17
websocket_client.py
Hello Yona
01-17
yjyoon 01-17 9bb012f Hello Yona UNIX
Raw Open in browser Change history
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Real-Time Audio Transcription with VAD and Volume Meter</title> <style> body { font-family: Arial, sans-serif; margin: 40px; background-color: #f5f5f5; } h1 { text-align: center; } #controls { text-align: center; margin-bottom: 20px; } #recordBtn { padding: 15px 30px; font-size: 18px; border: none; border-radius: 5px; background-color: #28a745; /* Green */ color: white; cursor: pointer; transition: background-color 0.3s ease; } #recordBtn.recording { background-color: #dc3545; /* Red */ } #transcriptions { max-width: 800px; margin: 0 auto; padding: 20px; background-color: white; border-radius: 8px; box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1); height: 600px; overflow-y: auto; white-space: pre-wrap; font-size: 16px; } .transcription { margin-bottom: 10px; } .notice { color: #dc3545; /* Red */ font-style: italic; } /* Volume Meter Styles */ #volumeMeter { width: 300px; height: 30px; background-color: #e0e0e0; border-radius: 15px; overflow: hidden; margin: 20px auto; position: relative; } #volumeBar { height: 100%; width: 0%; background-color: #28a745; transition: width 0.1s ease, background-color 0.1s ease; } </style> </head> <body> <h1>Real-Time Audio Transcription with VAD and Volume Meter</h1> <div id="controls"> <button id="recordBtn">Start Recording</button> </div> <!-- Volume Meter --> <div id="volumeMeter"> <div id="volumeBar"></div> </div> <div id="transcriptions"></div> <!-- Include ONNX Runtime Web --> <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/ort.js"></script> <!-- Include VAD-Web --> <script src="https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.22/dist/bundle.min.js"></script> <script> // Elements const recordBtn = document.getElementById('recordBtn'); const transcriptionsDiv = document.getElementById('transcriptions'); const volumeBar = document.getElementById('volumeBar'); // State Variables let isRecording = false; let vadInstance = null; // Renamed to avoid conflict let ws = null; let audioContext = null; let analyser = null; let microphoneStream = null; let dataArray = null; let animationId = null; let reconnectInterval = 3000; // 3 seconds let shouldReconnect = false; // Flag to control reconnection // Configuration const WS_ENDPOINT = "ws://takensofttesting.iptime.org:54127/v1/audio/transcriptions?language=ko"; // Ensure this is correct // Buffer to hold incoming data for JSON parsing let incomingBuffer = ''; // Utility Functions /** * Logs transcription text with colored words based on probability. * @param {Array} words - Array of word objects with 'word' and 'probability'. */ function logTranscription(words) { const transcriptionLine = document.createElement('div'); transcriptionLine.classList.add('transcription'); words.forEach(wordObj => { const span = document.createElement('span'); span.textContent = wordObj.word + ' '; // Add space after each word // Calculate hue: 0 (red) to 240 (blue) const hue = wordObj.probability * 240; span.style.color = `hsl(${hue}, 100%, 50%)`; transcriptionLine.appendChild(span); }); transcriptionsDiv.appendChild(transcriptionLine); transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight; } /** * Logs notice messages (e.g., connection status, errors). * @param {string} text - The notice text to display. */ function logNotice(text) { const p = document.createElement('p'); p.classList.add('notice'); p.textContent = text; transcriptionsDiv.appendChild(p); transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight; } /** * Converts Float32 audio data to Int16 PCM format. * @param {Float32Array} buffer - The audio buffer in Float32 format. * @returns {Int16Array} - The audio buffer in Int16 format. */ function convertFloat32ToInt16(buffer) { let l = buffer.length; const buf = new Int16Array(l); while (l--) { buf[l] = Math.min(1, buffer[l]) * 0x7FFF; } return buf; } /** * Extracts JSON objects from a concatenated string. * @param {string} buffer - The concatenated JSON string. * @returns {Array} - An array of parsed JSON objects. */ function extractJSONObjects(buffer) { const objects = []; let braceStack = 0; let inString = false; let escape = false; let lastSplit = 0; for (let i = 0; i < buffer.length; i++) { const char = buffer[i]; if (char === '"' && !escape) { inString = !inString; } if (!inString) { if (char === '{') { braceStack++; } else if (char === '}') { braceStack--; if (braceStack === 0) { const jsonString = buffer.slice(lastSplit, i + 1); try { const jsonObj = JSON.parse(jsonString); objects.push(jsonObj); } catch (e) { console.error('Failed to parse JSON:', e); } lastSplit = i + 1; } } } // Handle escape characters if (char === '\\' && !escape) { escape = true; } else { escape = false; } } // Return any remaining buffer that wasn't parsed incomingBuffer = buffer.slice(lastSplit); return objects; } // WebSocket Handlers /** * Sets up the WebSocket connection and defines event handlers. */ function setupWebSocket() { ws = new WebSocket(WS_ENDPOINT); ws.binaryType = 'arraybuffer'; ws.onopen = () => { console.log('WebSocket connection opened.'); logNotice("WebSocket connection established."); }; ws.onmessage = (event) => { let messageData = ''; if (typeof event.data === 'string') { messageData = event.data; } else if (event.data instanceof ArrayBuffer) { const decoder = new TextDecoder('utf-8'); messageData = decoder.decode(event.data); } else { console.warn('Unsupported message format:', event.data); return; } // Append incoming data to buffer incomingBuffer += messageData; // Extract JSON objects const jsonObjects = extractJSONObjects(incomingBuffer); // Process each JSON object jsonObjects.forEach(obj => { if (obj.task === "transcribe" && Array.isArray(obj.words)) { logTranscription(obj.words); } }); }; ws.onclose = (event) => { console.log('WebSocket connection closed:', event); logNotice("WebSocket connection closed."); ws = null; if (isRecording && shouldReconnect) { logNotice("Attempting to reconnect..."); setTimeout(() => { setupWebSocket(); }, reconnectInterval); } else if (isRecording) { logNotice("Transcription session ended."); stopRecording(true); // true indicates server-initiated stop } }; ws.onerror = (error) => { console.error('WebSocket error:', error); logNotice("WebSocket encountered an error."); }; } // Voice Activity Detection Setup /** * Initializes the Voice Activity Detector (VAD) using Silero VAD. */ async function initializeVAD(stream) { try { vadInstance = await vad.MicVAD.new({ stream: stream, // Pass the existing MediaStream to avoid multiple microphone accesses onSpeechStart: () => { console.log("Speech start detected"); logNotice("Speech detected..."); }, onSpeechEnd: (audio) => { console.log("Speech end detected"); logNotice("Sending speech segment to server..."); // Convert Float32Array to Int16Array const int16Audio = convertFloat32ToInt16(audio); // Send the audio buffer via WebSocket if (ws && ws.readyState === WebSocket.OPEN) { ws.send(int16Audio.buffer); } else { console.warn('WebSocket is not open. Cannot send audio.'); logNotice("WebSocket is not open. Audio segment not sent."); } } }); } catch (error) { console.error('Error initializing VAD:', error); logNotice("Error initializing Voice Activity Detection."); } } // Volume Meter Setup /** * Sets up the volume meter using the Web Audio API. */ async function setupVolumeMeter(stream) { try { // Initialize AudioContext audioContext = new (window.AudioContext || window.webkitAudioContext)(); // Create MediaStreamSource from the existing stream microphoneStream = audioContext.createMediaStreamSource(stream); // Create AnalyserNode analyser = audioContext.createAnalyser(); analyser.fftSize = 512; const bufferLength = analyser.frequencyBinCount; dataArray = new Uint8Array(bufferLength); // Connect microphone to analyser microphoneStream.connect(analyser); // Start visualizing visualize(); } catch (error) { console.error('Error setting up volume meter:', error); logNotice("Error setting up volume meter."); } } /** * Visualizes the volume level on the volume meter. */ function visualize() { const updateVolume = () => { analyser.getByteFrequencyData(dataArray); let sum = 0; for (let i = 0; i < dataArray.length; i++) { sum += dataArray[i]; } const average = sum / dataArray.length; const volume = average / 255; // Normalize to [0,1] // Update the volume bar width volumeBar.style.width = `${volume * 100}%`; // Change color based on volume level (green to red) const hue = (1 - volume) * 120; // 120 (green) to 0 (red) volumeBar.style.backgroundColor = `hsl(${hue}, 100%, 50%)`; animationId = requestAnimationFrame(updateVolume); }; updateVolume(); } /** * Stops the volume meter visualization. */ function stopVolumeMeter() { if (animationId) { cancelAnimationFrame(animationId); animationId = null; } if (volumeBar) { volumeBar.style.width = '0%'; volumeBar.style.backgroundColor = '#28a745'; // Reset to green } if (analyser) { analyser.disconnect(); analyser = null; } if (microphoneStream) { microphoneStream.disconnect(); microphoneStream = null; } if (audioContext) { audioContext.close(); audioContext = null; } } // Recording Control Functions /** * Starts the Voice Activity Detection, Volume Meter, and WebSocket connection. */ async function startRecording() { try { // Request microphone access once const stream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false }); // Set up Volume Meter await setupVolumeMeter(stream); // Initialize VAD with the same stream await initializeVAD(stream); // Set up WebSocket shouldReconnect = true; // Enable reconnection attempts setupWebSocket(); // Start VAD if (vadInstance) { vadInstance.start(); } // Update UI isRecording = true; recordBtn.textContent = 'Stop Recording'; recordBtn.classList.add('recording'); logNotice("Recording started. Speak into your microphone."); } catch (error) { console.error('Error starting recording:', error); logNotice("Error starting recording. Please try again."); } } /** * Stops the Voice Activity Detection, Volume Meter, and cleans up resources. * @param {boolean} serverInitiated - Indicates if the stop was triggered by the server. */ function stopRecording(serverInitiated = false) { if (!isRecording) return; // Stop VAD if (vadInstance) { vadInstance.pause(); vadInstance = null; } // Stop Volume Meter stopVolumeMeter(); // Prevent reconnection if stopping manually if (!serverInitiated) { shouldReconnect = false; } // Close WebSocket if not server-initiated if (!serverInitiated && ws && ws.readyState === WebSocket.OPEN) { ws.send(JSON.stringify({ action: "terminate" })); logNotice("Termination signal sent to server."); } // Close WebSocket if (ws) { ws.close(); ws = null; } // Reset recording state isRecording = false; recordBtn.textContent = 'Start Recording'; recordBtn.classList.remove('recording'); logNotice("Recording stopped."); } // Button Event Listener /** * Toggles recording state when the record button is clicked. */ recordBtn.addEventListener('click', () => { if (!isRecording) { startRecording().catch(error => { console.error('Error starting recording:', error); logNotice("Error starting recording. Please try again."); }); } else { stopRecording(); } }); </script> </body> </html>

          
        
    
    
Copyright Yona authors & © NAVER Corp. & NAVER LABS Supported by NAVER CLOUD PLATFORM

or
Sign in with github login with Google Sign in with Google
Reset password | Sign up