This guide provides a complete working example of how to use our WebSocket API for real-time audio streaming and transcription.

Sample Project

Below is a complete HTML file that demonstrates:
  • WebSocket connection and authentication
  • Microphone access and audio processing
  • Real-time audio streaming
  • Handling transcription responses
You can save the following two blocks of code as HTML and Javascript files respectively and serve them using a local server to test it directly in your browser:

webSocketClient.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>WebSocket Audio Stream</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
        }
        button {
            padding: 12px 24px;
            font-size: 16px;
            margin: 10px;
            cursor: pointer;
        }
        #status {
            margin: 20px 0;
            color: #666;
        }
        #transcript {
            margin: 20px 0;
            border: 1px solid #ccc;
            padding: 10px;
            background-color: #f9f9f9;
        }
    </style>
</head>
<body>
    <h1>Audio Streaming via WebSocket</h1>
    <button id="startButton">Start Streaming</button>
    <button id="stopButton" disabled>Stop Streaming</button>
    <div id="status">Status: Ready</div>
    <div id="transcript">Transcript will appear here...</div>

    <script>
        // Load AudioWorklet processor from separate file
        const audioProcessorUrl = 'http://localhost:8001/audioProcessorWorklet.js';

        let websocket;
        let audioContext;
        let mediaStreamSource;
        let audioProcessor;
        let audioChunks = [];
        const statusDiv = document.getElementById('status');
        const transcriptDiv = document.getElementById('transcript');
        const startButton = document.getElementById('startButton');
        const stopButton = document.getElementById('stopButton');
        const BUFFER_SIZE = 2400; // 50 ms
        const RECORDING_SAMPLE_RATE = 48000;
        const TARGET_SAMPLE_RATE = 16000;
        const PACKET_DURATION = BUFFER_SIZE / RECORDING_SAMPLE_RATE;
        let packetPosition = 0;

        async function setupRecorder() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                audioContext = new AudioContext();
                
                mediaStreamSource = audioContext.createMediaStreamSource(stream);
                
                // Load AudioWorklet processor
                await audioContext.audioWorklet.addModule(audioProcessorUrl);
                
                // Create and configure AudioWorkletNode
                const audioProcessor = createAudioWorkletNode(audioContext, BUFFER_SIZE);
                
                // Connect nodes
                mediaStreamSource.connect(audioProcessor);
                audioProcessor.connect(audioContext.destination);
                
                return true;
            } catch (err) {
                console.error('Error accessing microphone:', err);
                statusDiv.textContent = 'Error: Could not access microphone';
                return false;
            }
        }

        function createAudioWorkletNode(context) {
            const audioProcessor = new AudioWorkletNode(context, 'audioProcessorWorklet');
            audioProcessor.port.onmessage = async (event) => {
                if (event.data.type === 'audioData') {
                    await processAudioData(event.data.data, audioContext);
                }
            };
            
            // Configure processor
            audioProcessor.port.postMessage({ 
                type: 'setSampleRate', 
                sampleRate: RECORDING_SAMPLE_RATE,
                bufferSize: BUFFER_SIZE
            });
            
            return audioProcessor;
        }

        async function processAudioData(inputData, context) {
            // Resample audio data
            const resampledData = await resampleAudio(inputData, context.sampleRate, TARGET_SAMPLE_RATE);
            
            // Convert to Int16 and calculate volume
            const intData = convertToInt16(resampledData);
            const volume = calculateVolume(resampledData);
            
            sendAudioData(intData.buffer, volume);
        }

        function resampleAudio(inputData, inputSampleRate, outputSampleRate) {
            // Create OfflineAudioContext with 1 channel and target sample rate
            const offlineCtx = new OfflineAudioContext(1, inputData.length * (outputSampleRate / inputSampleRate), outputSampleRate);
            
            // Create buffer at original sample rate
            const audioBuffer = offlineCtx.createBuffer(1, inputData.length, inputSampleRate);
            audioBuffer.copyToChannel(inputData, 0);
            
            // Create source and start render
            const source = offlineCtx.createBufferSource();
            source.buffer = audioBuffer;
            source.connect(offlineCtx.destination);
            source.start();
            
            return offlineCtx.startRendering().then(buffer => {
                // Ensure we get exactly one channel of data
                if (buffer.numberOfChannels !== 1) {
                    console.error('Error: Expected 1 channel but got', buffer.numberOfChannels);
                }
                return buffer.getChannelData(0);
            });
        }

        function convertToInt16(floatData) {
            const intData = new Int16Array(floatData.length);
            for (let i = 0; i < floatData.length; i++) {
                // Clamp to [-1, 1] range
                const s = Math.max(-1, Math.min(1, floatData[i]));
                // Convert to 16-bit signed integer
                intData[i] = s < 0 ? Math.floor(s * 32768) : Math.floor(s * 32767);
            }
            return intData;
        }

        function calculateVolume(data) {
            let sum = 0;
            for (let i = 0; i < data.length; i++) {
                sum += data[i] * data[i];
            }
            return Math.sqrt(sum / data.length);
        }

        function connectWebSocket() {
            if (websocket) websocket.close();

            websocket = new WebSocket('ws://localhost:8000/api/v1/dash/ws?api_key=<YOUR_API_KEY>&verbose=true');
            
            websocket.onopen = () => {
                statusDiv.textContent = 'Status: Connected to WebSocket';
                websocket.send(JSON.stringify({
                    type: 'auth',
                    access_token: '<YOUR_ACCESS_TOKEN>',
                    context: {
                        app: {
                            name: "Weather Forecast Chatbot",
                            type: "ai"
                        },
                        dictionary_context: [],
                        user_identifier: "john_doe_1",
                        user_first_name: "John",
                        user_last_name: "Doe",
                        textbox_contents: {
                            before_text: "",
                            selected_text: "",
                            after_text: ""
                        },
                        screenshot: null,
                        content_text: null,
                        content_html: null,
                        conversation: null,
                    },
                    language: ['en'],
                }));
            };

            websocket.onmessage = (event) => {
                const message = JSON.parse(event.data);
                console.log(`Received message: ${JSON.stringify(message)}`);

                if (message.status === 'auth') {
                    statusDiv.textContent = 'Status: Authenticated, ready to stream';
                } else if (message.status === 'info') {
                    // Handle info messages (session_started, chunk_received, etc.)
                    const info = message.message;
                    statusDiv.textContent = `Status: ${info.event}`;
                } else if (message.status === 'text') {
                    // Handle text responses (transcripts)
                    if (message.body.text) {
                        transcriptDiv.textContent = `Transcript: ${message.body.text}`;
                    }
                } else if (message.error) {
                    console.error('WebSocket error:', message.error);
                    statusDiv.textContent = `Error: ${message.error}`;
                }
            };

            websocket.onclose = () => {
                statusDiv.textContent = 'Status: WebSocket connection closed';
            };
            websocket.onerror = (error) => {
                console.error('WebSocket error:', error);
                statusDiv.textContent = 'Error: WebSocket encountered an error';
            };
        }

        function sendAudioData(buffer, volume) {
            if (!websocket || websocket.readyState !== WebSocket.OPEN) return;
            
            const audioBytes = new Uint8Array(buffer)
            const base64Audio = btoa(String.fromCharCode(...audioBytes));
            websocket.send(JSON.stringify({
                type: 'append',
                position: packetPosition,
                audio_packets: {
                    packets: [base64Audio],
                    volumes: [volume],
                    packet_duration: PACKET_DURATION,
                    audio_encoding: 'wav',
                    byte_encoding: 'base64'
                }
            }));
            packetPosition++;
        }

        startButton.addEventListener('click', async () => {
            packetPosition = 0;
            const setup = await setupRecorder();
            if (!setup) return;
            connectWebSocket();
            startButton.disabled = true;
            stopButton.disabled = false;
            statusDiv.textContent = 'Status: Recording and streaming...';
        });

        stopButton.addEventListener('click', async () => {
            if (audioContext && audioContext.state === 'running') {
                await audioContext.suspend();
                if (audioProcessor) {
                    audioProcessor.disconnect();
                    mediaStreamSource.disconnect();
                    audioProcessor = null;
                    audioContext = null;
                }
            }
            
            if (websocket && websocket.readyState === WebSocket.OPEN) {
                websocket.send(JSON.stringify({
                    type: 'commit',
                    total_packets: packetPosition,
                }));
            }
            startButton.disabled = false;
            stopButton.disabled = true;
            statusDiv.textContent = 'Status: Stopped streaming';
        });
    </script>
</body>
</html>

audioProcessorWorklet.js

class AudioProcessor extends AudioWorkletProcessor {
    constructor() {
        super();
        this.port.onmessage = (event) => {
            if (event.data.type === 'setSampleRate') {
                this.targetSampleRate = event.data.sampleRate;
                this.bufferSize = event.data.bufferSize;
                this.port.postMessage({ type: 'ready' });
            }
        };
        this.buffer = [];
    }

    process(inputs, outputs, parameters) {
        const inputData = inputs[0][0];
        
        // Convert to Float32Array if needed
        const floatData = new Float32Array(inputData);
        this.buffer.push(...floatData);
        
        // Send data when we have enough samples
        if (this.buffer.length >= this.bufferSize) {
            const chunk = new Float32Array(this.buffer.slice(0, this.bufferSize));
            this.port.postMessage({
                type: 'audioData',
                data: chunk
            });
            this.buffer = this.buffer.slice(this.bufferSize);
        }
        return true;
    }
}

registerProcessor('audioProcessorWorklet', AudioProcessor);

Key Features

This sample project demonstrates several important features:
  1. WebSocket Connection: Establishes a secure WebSocket connection with verbose mode enabled
  2. Audio Processing:
    • Captures audio from the microphone
    • Resamples to 16kHz
    • Converts to the correct format (16-bit PCM WAV)
  3. Streaming Protocol:
    • Sends audio in chunks
    • Tracks packet positions
    • Calculates audio volumes
    • Handles final message with final: true
  4. Response Handling:
    • Authentication responses
    • Info messages (in verbose mode)
    • Transcription updates
    • Error handling

Usage

  1. Replace <YOUR_API_KEY> with your actual API key
  2. Replace <YOUR_ACCESS_TOKEN> with your access token
  3. Save the file with a .html extension
  4. Open in a modern web browser
  5. Click “Start Streaming” to begin recording and transcribing
  6. Click “Stop Streaming” to end the session
Make sure you’re using a modern browser that supports the WebAudio API and WebSocket connections. The sample requires HTTPS or localhost for microphone access. Safari is known to handle microphone access differently.