WebSocket Quickstart

This guide provides a complete working example of how to use our WebSocket API for real-time audio streaming and transcription.

Sample Project

Below is a complete HTML file that demonstrates:

WebSocket connection and authentication
Microphone access and audio processing
Real-time audio streaming
Handling transcription responses

You can save the following two blocks of code as HTML and Javascript files respectively and serve them using a local server to test it directly in your browser:

webSocketClient.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>WebSocket Audio Stream</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
        }
        button {
            padding: 12px 24px;
            font-size: 16px;
            margin: 10px;
            cursor: pointer;
        }
        #status {
            margin: 20px 0;
            color: #666;
        }
        #transcript {
            margin: 20px 0;
            border: 1px solid #ccc;
            padding: 10px;
            background-color: #f9f9f9;
        }
    </style>
</head>
<body>
    <h1>Audio Streaming via WebSocket</h1>
    <button id="startButton">Start Streaming</button>
    <button id="stopButton" disabled>Stop Streaming</button>
    <div id="status">Status: Ready</div>
    <div id="transcript">Transcript will appear here...</div>

    <script>
        // Load AudioWorklet processor from separate file
        const audioProcessorUrl = 'http://localhost:8001/audioProcessorWorklet.js';

        let websocket;
        let audioContext;
        let mediaStreamSource;
        let audioProcessor;
        let audioChunks = [];
        const statusDiv = document.getElementById('status');
        const transcriptDiv = document.getElementById('transcript');
        const startButton = document.getElementById('startButton');
        const stopButton = document.getElementById('stopButton');
        const BUFFER_SIZE = 2400; // 50 ms
        const RECORDING_SAMPLE_RATE = 48000;
        const TARGET_SAMPLE_RATE = 16000;
        const PACKET_DURATION = BUFFER_SIZE / RECORDING_SAMPLE_RATE;
        let packetPosition = 0;

        async function setupRecorder() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                audioContext = new AudioContext();
                
                mediaStreamSource = audioContext.createMediaStreamSource(stream);
                
                // Load AudioWorklet processor
                await audioContext.audioWorklet.addModule(audioProcessorUrl);
                
                // Create and configure AudioWorkletNode
                const audioProcessor = createAudioWorkletNode(audioContext, BUFFER_SIZE);
                
                // Connect nodes
                mediaStreamSource.connect(audioProcessor);
                audioProcessor.connect(audioContext.destination);
                
                return true;
            } catch (err) {
                console.error('Error accessing microphone:', err);
                statusDiv.textContent = 'Error: Could not access microphone';
                return false;
            }
        }

        function createAudioWorkletNode(context) {
            const audioProcessor = new AudioWorkletNode(context, 'audioProcessorWorklet');
            audioProcessor.port.onmessage = async (event) => {
                if (event.data.type === 'audioData') {
                    await processAudioData(event.data.data, audioContext);
                }
            };
            
            // Configure processor
            audioProcessor.port.postMessage({ 
                type: 'setSampleRate', 
                sampleRate: RECORDING_SAMPLE_RATE,
                bufferSize: BUFFER_SIZE
            });
            
            return audioProcessor;
        }

        async function processAudioData(inputData, context) {
            // Resample audio data
            const resampledData = await resampleAudio(inputData, context.sampleRate, TARGET_SAMPLE_RATE);
            
            // Convert to Int16 and calculate volume
            const intData = convertToInt16(resampledData);
            const volume = calculateVolume(resampledData);
            
            sendAudioData(intData.buffer, volume);
        }

        function resampleAudio(inputData, inputSampleRate, outputSampleRate) {
            // Create OfflineAudioContext with 1 channel and target sample rate
            const offlineCtx = new OfflineAudioContext(1, inputData.length * (outputSampleRate / inputSampleRate), outputSampleRate);
            
            // Create buffer at original sample rate
            const audioBuffer = offlineCtx.createBuffer(1, inputData.length, inputSampleRate);
            audioBuffer.copyToChannel(inputData, 0);
            
            // Create source and start render
            const source = offlineCtx.createBufferSource();
            source.buffer = audioBuffer;
            source.connect(offlineCtx.destination);
            source.start();
            
            return offlineCtx.startRendering().then(buffer => {
                // Ensure we get exactly one channel of data
                if (buffer.numberOfChannels !== 1) {
                    console.error('Error: Expected 1 channel but got', buffer.numberOfChannels);
                }
                return buffer.getChannelData(0);
            });
        }

        function convertToInt16(floatData) {
            const intData = new Int16Array(floatData.length);
            for (let i = 0; i < floatData.length; i++) {
                // Clamp to [-1, 1] range
                const s = Math.max(-1, Math.min(1, floatData[i]));
                // Convert to 16-bit signed integer
                intData[i] = s < 0 ? Math.floor(s * 32768) : Math.floor(s * 32767);
            }
            return intData;
        }

        function calculateVolume(data) {
            let sum = 0;
            for (let i = 0; i < data.length; i++) {
                sum += data[i] * data[i];
            }
            return Math.sqrt(sum / data.length);
        }

        function connectWebSocket() {
            if (websocket) websocket.close();

            websocket = new WebSocket('ws://localhost:8000/api/v1/dash/ws?api_key=Bearer%20<YOUR_API_KEY>');
            
            websocket.onopen = () => {
                statusDiv.textContent = 'Status: Connected to WebSocket';
                websocket.send(JSON.stringify({
                    type: 'auth',
                    access_token: '<YOUR_ACCESS_TOKEN>',
                    context: {
                        app: {
                            name: "Weather Forecast Chatbot",
                            type: "ai"
                        },
                        dictionary_context: [],
                        user_identifier: "john_doe_1",
                        user_first_name: "John",
                        user_last_name: "Doe",
                        textbox_contents: {
                            before_text: "",
                            selected_text: "",
                            after_text: ""
                        },
                        screenshot: null,
                        content_text: null,
                        content_html: null,
                        conversation: null,
                    },
                    language: ['en'],
                }));
            };

            websocket.onmessage = (event) => {
                const message = JSON.parse(event.data);
                console.log(`Received message: ${JSON.stringify(message)}`);

                if (message.status === 'auth') {
                    statusDiv.textContent = 'Status: Authenticated, ready to stream';
                } else if (message.status === 'info') {
                    // Handle info messages (session_started, chunk_received, etc.)
                    const info = message.message;
                    statusDiv.textContent = `Status: ${info.event}`;
                } else if (message.status === 'text') {
                    // Handle text responses (transcripts)
                    if (message.body.text) {
                        transcriptDiv.textContent = `Transcript: ${message.body.text}`;
                    }
                } else if (message.error) {
                    console.error('WebSocket error:', message.error);
                    statusDiv.textContent = `Error: ${message.error}`;
                }
            };

            websocket.onclose = () => {
                statusDiv.textContent = 'Status: WebSocket connection closed';
            };
            websocket.onerror = (error) => {
                console.error('WebSocket error:', error);
                statusDiv.textContent = 'Error: WebSocket encountered an error';
            };
        }

        function sendAudioData(buffer, volume) {
            if (!websocket || websocket.readyState !== WebSocket.OPEN) return;
            
            const audioBytes = new Uint8Array(buffer)
            const base64Audio = btoa(String.fromCharCode(...audioBytes));
            websocket.send(JSON.stringify({
                type: 'append',
                position: packetPosition,
                audio_packets: {
                    packets: [base64Audio],
                    volumes: [volume],
                    packet_duration: PACKET_DURATION,
                    audio_encoding: 'wav',
                    byte_encoding: 'base64'
                }
            }));
            packetPosition++;
        }

        startButton.addEventListener('click', async () => {
            packetPosition = 0;
            const setup = await setupRecorder();
            if (!setup) return;
            connectWebSocket();
            startButton.disabled = true;
            stopButton.disabled = false;
            statusDiv.textContent = 'Status: Recording and streaming...';
        });

        stopButton.addEventListener('click', async () => {
            if (audioContext && audioContext.state === 'running') {
                await audioContext.suspend();
                if (audioProcessor) {
                    audioProcessor.disconnect();
                    mediaStreamSource.disconnect();
                    audioProcessor = null;
                    audioContext = null;
                }
            }
            
            if (websocket && websocket.readyState === WebSocket.OPEN) {
                websocket.send(JSON.stringify({
                    type: 'commit',
                    total_packets: packetPosition,
                }));
            }
            startButton.disabled = false;
            stopButton.disabled = true;
            statusDiv.textContent = 'Status: Stopped streaming';
        });
    </script>
</body>
</html>

audioProcessorWorklet.js

class AudioProcessor extends AudioWorkletProcessor {
    constructor() {
        super();
        this.port.onmessage = (event) => {
            if (event.data.type === 'setSampleRate') {
                this.targetSampleRate = event.data.sampleRate;
                this.bufferSize = event.data.bufferSize;
                this.port.postMessage({ type: 'ready' });
            }
        };
        this.buffer = [];
    }

    process(inputs, outputs, parameters) {
        const inputData = inputs[0][0];
        
        // Convert to Float32Array if needed
        const floatData = new Float32Array(inputData);
        this.buffer.push(...floatData);
        
        // Send data when we have enough samples
        if (this.buffer.length >= this.bufferSize) {
            const chunk = new Float32Array(this.buffer.slice(0, this.bufferSize));
            this.port.postMessage({
                type: 'audioData',
                data: chunk
            });
            this.buffer = this.buffer.slice(this.bufferSize);
        }
        return true;
    }
}

registerProcessor('audioProcessorWorklet', AudioProcessor);

Key Features

This sample project demonstrates several important features:

WebSocket Connection: Establishes a secure WebSocket connection with verbose mode enabled
Audio Processing:
- Captures audio from the microphone
- Resamples to 16kHz
- Converts to the correct format (16-bit PCM WAV)
Streaming Protocol:
- Sends audio in chunks
- Tracks packet positions
- Calculates audio volumes
- Handles final message with final: true
Response Handling:
- Authentication responses
- Info messages (in verbose mode)
- Transcription updates
- Error handling

Usage

Replace <YOUR_API_KEY> with your actual API key
Replace <YOUR_ACCESS_TOKEN> with your access token
Save the file with a .html extension
Open in a modern web browser
Click “Start Streaming” to begin recording and transcribing
Click “Stop Streaming” to end the session

Make sure you’re using a modern browser that supports the WebAudio API and WebSocket connections. The sample requires HTTPS or localhost for microphone access. Safari is known to handle microphone access differently.

Getting Started

Basics

Other Endpoints

Client Side Auth

Sample Projects

WebSocket Quickstart

Sample Project

webSocketClient.html

audioProcessorWorklet.js

Key Features

Usage

Getting Started

Basics

Other Endpoints

Client Side Auth

Sample Projects

​Sample Project

​webSocketClient.html

​audioProcessorWorklet.js

​Key Features

​Usage

Sample Project

webSocketClient.html

audioProcessorWorklet.js

Key Features

Usage