
File name
Commit message
Commit date
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Real-Time Audio Transcription with VAD and Volume Meter</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 40px;
background-color: #f5f5f5;
}
h1 {
text-align: center;
}
#controls {
text-align: center;
margin-bottom: 20px;
}
#recordBtn {
padding: 15px 30px;
font-size: 18px;
border: none;
border-radius: 5px;
background-color: #28a745; /* Green */
color: white;
cursor: pointer;
transition: background-color 0.3s ease;
}
#recordBtn.recording {
background-color: #dc3545; /* Red */
}
#transcriptions {
max-width: 800px;
margin: 0 auto;
padding: 20px;
background-color: white;
border-radius: 8px;
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
height: 600px;
overflow-y: auto;
white-space: pre-wrap;
font-size: 16px;
}
.transcription {
margin-bottom: 10px;
}
.notice {
color: #dc3545; /* Red */
font-style: italic;
}
/* Volume Meter Styles */
#volumeMeter {
width: 300px;
height: 30px;
background-color: #e0e0e0;
border-radius: 15px;
overflow: hidden;
margin: 20px auto;
position: relative;
}
#volumeBar {
height: 100%;
width: 0%;
background-color: #28a745;
transition: width 0.1s ease, background-color 0.1s ease;
}
</style>
</head>
<body>
<h1>Real-Time Audio Transcription with VAD and Volume Meter</h1>
<div id="controls">
<button id="recordBtn">Start Recording</button>
</div>
<!-- Volume Meter -->
<div id="volumeMeter">
<div id="volumeBar"></div>
</div>
<div id="transcriptions"></div>
<!-- Include ONNX Runtime Web -->
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/ort.js"></script>
<!-- Include VAD-Web -->
<script src="https://cdn.jsdelivr.net/npm/@ricky0123/vad-web@0.0.22/dist/bundle.min.js"></script>
<script>
// Elements
const recordBtn = document.getElementById('recordBtn');
const transcriptionsDiv = document.getElementById('transcriptions');
const volumeBar = document.getElementById('volumeBar');
// State Variables
let isRecording = false;
let vadInstance = null; // Renamed to avoid conflict
let ws = null;
let audioContext = null;
let analyser = null;
let microphoneStream = null;
let dataArray = null;
let animationId = null;
let reconnectInterval = 3000; // 3 seconds
let shouldReconnect = false; // Flag to control reconnection
// Configuration
const WS_ENDPOINT = "ws://takensofttesting.iptime.org:54127/v1/audio/transcriptions?language=ko"; // Ensure this is correct
// Buffer to hold incoming data for JSON parsing
let incomingBuffer = '';
// Utility Functions
/**
* Logs transcription text with colored words based on probability.
* @param {Array} words - Array of word objects with 'word' and 'probability'.
*/
function logTranscription(words) {
const transcriptionLine = document.createElement('div');
transcriptionLine.classList.add('transcription');
words.forEach(wordObj => {
const span = document.createElement('span');
span.textContent = wordObj.word + ' '; // Add space after each word
// Calculate hue: 0 (red) to 240 (blue)
const hue = wordObj.probability * 240;
span.style.color = `hsl(${hue}, 100%, 50%)`;
transcriptionLine.appendChild(span);
});
transcriptionsDiv.appendChild(transcriptionLine);
transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight;
}
/**
* Logs notice messages (e.g., connection status, errors).
* @param {string} text - The notice text to display.
*/
function logNotice(text) {
const p = document.createElement('p');
p.classList.add('notice');
p.textContent = text;
transcriptionsDiv.appendChild(p);
transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight;
}
/**
* Converts Float32 audio data to Int16 PCM format.
* @param {Float32Array} buffer - The audio buffer in Float32 format.
* @returns {Int16Array} - The audio buffer in Int16 format.
*/
function convertFloat32ToInt16(buffer) {
let l = buffer.length;
const buf = new Int16Array(l);
while (l--) {
buf[l] = Math.min(1, buffer[l]) * 0x7FFF;
}
return buf;
}
/**
* Extracts JSON objects from a concatenated string.
* @param {string} buffer - The concatenated JSON string.
* @returns {Array} - An array of parsed JSON objects.
*/
function extractJSONObjects(buffer) {
const objects = [];
let braceStack = 0;
let inString = false;
let escape = false;
let lastSplit = 0;
for (let i = 0; i < buffer.length; i++) {
const char = buffer[i];
if (char === '"' && !escape) {
inString = !inString;
}
if (!inString) {
if (char === '{') {
braceStack++;
} else if (char === '}') {
braceStack--;
if (braceStack === 0) {
const jsonString = buffer.slice(lastSplit, i + 1);
try {
const jsonObj = JSON.parse(jsonString);
objects.push(jsonObj);
} catch (e) {
console.error('Failed to parse JSON:', e);
}
lastSplit = i + 1;
}
}
}
// Handle escape characters
if (char === '\\' && !escape) {
escape = true;
} else {
escape = false;
}
}
// Return any remaining buffer that wasn't parsed
incomingBuffer = buffer.slice(lastSplit);
return objects;
}
// WebSocket Handlers
/**
* Sets up the WebSocket connection and defines event handlers.
*/
function setupWebSocket() {
ws = new WebSocket(WS_ENDPOINT);
ws.binaryType = 'arraybuffer';
ws.onopen = () => {
console.log('WebSocket connection opened.');
logNotice("WebSocket connection established.");
};
ws.onmessage = (event) => {
let messageData = '';
if (typeof event.data === 'string') {
messageData = event.data;
} else if (event.data instanceof ArrayBuffer) {
const decoder = new TextDecoder('utf-8');
messageData = decoder.decode(event.data);
} else {
console.warn('Unsupported message format:', event.data);
return;
}
// Append incoming data to buffer
incomingBuffer += messageData;
// Extract JSON objects
const jsonObjects = extractJSONObjects(incomingBuffer);
// Process each JSON object
jsonObjects.forEach(obj => {
if (obj.task === "transcribe" && Array.isArray(obj.words)) {
logTranscription(obj.words);
}
});
};
ws.onclose = (event) => {
console.log('WebSocket connection closed:', event);
logNotice("WebSocket connection closed.");
ws = null;
if (isRecording && shouldReconnect) {
logNotice("Attempting to reconnect...");
setTimeout(() => {
setupWebSocket();
}, reconnectInterval);
} else if (isRecording) {
logNotice("Transcription session ended.");
stopRecording(true); // true indicates server-initiated stop
}
};
ws.onerror = (error) => {
console.error('WebSocket error:', error);
logNotice("WebSocket encountered an error.");
};
}
// Voice Activity Detection Setup
/**
* Initializes the Voice Activity Detector (VAD) using Silero VAD.
*/
async function initializeVAD(stream) {
try {
vadInstance = await vad.MicVAD.new({
stream: stream, // Pass the existing MediaStream to avoid multiple microphone accesses
onSpeechStart: () => {
console.log("Speech start detected");
logNotice("Speech detected...");
},
onSpeechEnd: (audio) => {
console.log("Speech end detected");
logNotice("Sending speech segment to server...");
// Convert Float32Array to Int16Array
const int16Audio = convertFloat32ToInt16(audio);
// Send the audio buffer via WebSocket
if (ws && ws.readyState === WebSocket.OPEN) {
ws.send(int16Audio.buffer);
} else {
console.warn('WebSocket is not open. Cannot send audio.');
logNotice("WebSocket is not open. Audio segment not sent.");
}
}
});
} catch (error) {
console.error('Error initializing VAD:', error);
logNotice("Error initializing Voice Activity Detection.");
}
}
// Volume Meter Setup
/**
* Sets up the volume meter using the Web Audio API.
*/
async function setupVolumeMeter(stream) {
try {
// Initialize AudioContext
audioContext = new (window.AudioContext || window.webkitAudioContext)();
// Create MediaStreamSource from the existing stream
microphoneStream = audioContext.createMediaStreamSource(stream);
// Create AnalyserNode
analyser = audioContext.createAnalyser();
analyser.fftSize = 512;
const bufferLength = analyser.frequencyBinCount;
dataArray = new Uint8Array(bufferLength);
// Connect microphone to analyser
microphoneStream.connect(analyser);
// Start visualizing
visualize();
} catch (error) {
console.error('Error setting up volume meter:', error);
logNotice("Error setting up volume meter.");
}
}
/**
* Visualizes the volume level on the volume meter.
*/
function visualize() {
const updateVolume = () => {
analyser.getByteFrequencyData(dataArray);
let sum = 0;
for (let i = 0; i < dataArray.length; i++) {
sum += dataArray[i];
}
const average = sum / dataArray.length;
const volume = average / 255; // Normalize to [0,1]
// Update the volume bar width
volumeBar.style.width = `${volume * 100}%`;
// Change color based on volume level (green to red)
const hue = (1 - volume) * 120; // 120 (green) to 0 (red)
volumeBar.style.backgroundColor = `hsl(${hue}, 100%, 50%)`;
animationId = requestAnimationFrame(updateVolume);
};
updateVolume();
}
/**
* Stops the volume meter visualization.
*/
function stopVolumeMeter() {
if (animationId) {
cancelAnimationFrame(animationId);
animationId = null;
}
if (volumeBar) {
volumeBar.style.width = '0%';
volumeBar.style.backgroundColor = '#28a745'; // Reset to green
}
if (analyser) {
analyser.disconnect();
analyser = null;
}
if (microphoneStream) {
microphoneStream.disconnect();
microphoneStream = null;
}
if (audioContext) {
audioContext.close();
audioContext = null;
}
}
// Recording Control Functions
/**
* Starts the Voice Activity Detection, Volume Meter, and WebSocket connection.
*/
async function startRecording() {
try {
// Request microphone access once
const stream = await navigator.mediaDevices.getUserMedia({ audio: true, video: false });
// Set up Volume Meter
await setupVolumeMeter(stream);
// Initialize VAD with the same stream
await initializeVAD(stream);
// Set up WebSocket
shouldReconnect = true; // Enable reconnection attempts
setupWebSocket();
// Start VAD
if (vadInstance) {
vadInstance.start();
}
// Update UI
isRecording = true;
recordBtn.textContent = 'Stop Recording';
recordBtn.classList.add('recording');
logNotice("Recording started. Speak into your microphone.");
} catch (error) {
console.error('Error starting recording:', error);
logNotice("Error starting recording. Please try again.");
}
}
/**
* Stops the Voice Activity Detection, Volume Meter, and cleans up resources.
* @param {boolean} serverInitiated - Indicates if the stop was triggered by the server.
*/
function stopRecording(serverInitiated = false) {
if (!isRecording) return;
// Stop VAD
if (vadInstance) {
vadInstance.pause();
vadInstance = null;
}
// Stop Volume Meter
stopVolumeMeter();
// Prevent reconnection if stopping manually
if (!serverInitiated) {
shouldReconnect = false;
}
// Close WebSocket if not server-initiated
if (!serverInitiated && ws && ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ action: "terminate" }));
logNotice("Termination signal sent to server.");
}
// Close WebSocket
if (ws) {
ws.close();
ws = null;
}
// Reset recording state
isRecording = false;
recordBtn.textContent = 'Start Recording';
recordBtn.classList.remove('recording');
logNotice("Recording stopped.");
}
// Button Event Listener
/**
* Toggles recording state when the record button is clicked.
*/
recordBtn.addEventListener('click', () => {
if (!isRecording) {
startRecording().catch(error => {
console.error('Error starting recording:', error);
logNotice("Error starting recording. Please try again.");
});
} else {
stopRecording();
}
});
</script>
</body>
</html>