This guide provides a complete working example of how to use our REST API for audio transcription. The sample project demonstrates recording audio in the browser and sending it to our API for processing.

Sample Project

Below is a complete HTML file that demonstrates:
  • Browser-based audio recording
  • Audio format conversion (WebM to 16kHz WAV)
  • Base64 encoding
  • REST API integration
  • Response handling
You can save this as an HTML file and run it directly in your browser:
<!DOCTYPE html>
<html>
<head>
    <title>Audio Recorder</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 0 auto;
            padding: 20px;
        }
        button {
            padding: 12px 24px;
            font-size: 16px;
            margin: 10px;
            cursor: pointer;
        }
        #status {
            margin: 20px 0;
            color: #666;
        }
        .file-input {
            margin: 20px 0;
        }
        #audioPlayer {
            margin: 20px 0;
            width: 100%;
        }
    </style>
</head>
<body>
    <h1>Audio Recorder</h1>
    <button id="recordButton">Start Recording</button>
    <audio id="audioPlayer" controls></audio>
    <div id="status">Status: Ready</div>
    <div id="response"></div>

    <script>
        let mediaRecorder;
        let audioChunks = [];
        let currentAudioBlob = null;
        let currentBase64Audio = '';
        const statusDiv = document.getElementById('status');
        const responseDiv = document.getElementById('response');
        const recordButton = document.getElementById('recordButton');
        const audioPlayer = document.getElementById('audioPlayer');
        
        const blobToBase64 = (blob) => {
            return new Promise((resolve, reject) => {
                const reader = new FileReader()
                reader.onloadend = () => {
                if (typeof reader.result === 'string') {
                    resolve(reader.result.split(',')[1])
                } else {
                    reject(new Error('Failed to convert blob to base64'))
                }
                }
                reader.onerror = reject
                reader.readAsDataURL(blob)
            })
        }

        const convertWebMToWAV = async (webmBlob) => {
            const audioContext = new AudioContext();
            const arrayBuffer = await webmBlob.arrayBuffer();
            const audioBuffer = await audioContext.decodeAudioData(arrayBuffer);

            // Calculate the correct number of samples at 16 kHz
            const targetSampleRate = 16000;
            const resampleRatio = targetSampleRate / audioBuffer.sampleRate;
            const newLength = Math.floor(audioBuffer.length * resampleRatio);

            // Create an OfflineAudioContext with the correct length
            const offlineAudioContext = new OfflineAudioContext(
                1, // mono channel
                newLength, 
                targetSampleRate
            );

            const source = offlineAudioContext.createBufferSource();
            source.buffer = audioBuffer;

            // Connect the source to the destination
            source.connect(offlineAudioContext.destination);

            source.start(0);
            const renderedBuffer = await offlineAudioContext.startRendering();

            // Prepare WAV file headers and data
            const numberOfChannels = 1;
            const length = renderedBuffer.length * numberOfChannels * 2 + 44;
            const buffer = new ArrayBuffer(length);
            const view = new DataView(buffer);

            const writeString = (view, offset, string) => {
                for (let i = 0; i < string.length; i++) {
                    view.setUint8(offset + i, string.charCodeAt(i));
                }
            };

            let offset = 0;

            // RIFF chunk descriptor
            writeString(view, offset, 'RIFF');
            offset += 4;
            view.setUint32(offset, 36 + renderedBuffer.length * numberOfChannels * 2, true);
            offset += 4;
            writeString(view, offset, 'WAVE');
            offset += 4;

            // fmt sub-chunk
            writeString(view, offset, 'fmt ');
            offset += 4;
            view.setUint32(offset, 16, true); // PCM format
            offset += 4;
            view.setUint16(offset, 1, true); // PCM
            offset += 2;
            view.setUint16(offset, numberOfChannels, true); // Mono
            offset += 2;
            view.setUint32(offset, targetSampleRate, true); // Sample rate
            offset += 4;
            view.setUint32(offset, targetSampleRate * numberOfChannels * 2, true); // Byte rate
            offset += 4;
            view.setUint16(offset, numberOfChannels * 2, true); // Block align
            offset += 2;
            view.setUint16(offset, 16, true); // Bits per sample
            offset += 2;

            // data sub-chunk
            writeString(view, offset, 'data');
            offset += 4;
            view.setUint32(offset, renderedBuffer.length * numberOfChannels * 2, true);
            offset += 4;

            // Write PCM samples
            const channelData = renderedBuffer.getChannelData(0);
            for (let i = 0; i < renderedBuffer.length; i++) {
                const sample = channelData[i];
                const intSample = sample < 0 ? sample * 0x8000 : sample * 0x7FFF;
                view.setInt16(offset, intSample, true);
                offset += 2;
            }

            return new Blob([view], { type: 'audio/wav' });
        }

        // Request microphone access and set up recorder
        async function setupRecorder() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                mediaRecorder = new MediaRecorder(stream);
                
                mediaRecorder.ondataavailable = (event) => {
                    audioChunks.push(event.data);
                };

                mediaRecorder.onstop = async () => {
                    webmBlob = new Blob(audioChunks, { type: 'audio/webm' });
                    currentAudioBlob = await convertWebMToWAV(webmBlob);
                    currentBase64Audio = await blobToBase64(currentAudioBlob);
                    audioPlayer.src = URL.createObjectURL(currentAudioBlob);
                    await sendAudioToServer(currentAudioBlob);
                    audioChunks = [];
                };

                return true;
            } catch (err) {
                console.error('Error accessing microphone:', err);
                statusDiv.textContent = 'Error: Could not access microphone';
                return false;
            }
        }

        // Send audio to server
        async function sendAudioToServer(audioBlob) {
            try {
                statusDiv.textContent = 'Status: Sending to server...';
                responseDiv.textContent = '';
                
                const startTime = performance.now();
                const response = await fetch('https://api.wisprflow.ai/api/v1/dash/api', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                        'Authorization': 'Bearer <YOUR_API_KEY>',
                    },
                    body: JSON.stringify({
                        audio: currentBase64Audio,
                        language: ["en"],
                        context: {
                            app: {
                                type: "email"
                            },
                            dictionary_context: [],
                            textbox_contents: {
                                before_text: "",
                                selected_text: "",
                                after_text: ""
                            },
                            // ... for a full list of available fields, see the "Request Schema" page
                        }
                    })
                });
                const apiDuration = (performance.now() - startTime) / 1000;
                console.log(`API call took ${apiDuration}s`);

                if (response.ok) {
                    const result = await response.json();
                    statusDiv.textContent = 'Status: Success!';
                    responseDiv.textContent = 'Server Response: ' + JSON.stringify(result, null, 2) + "\nAPI Duration: " + apiDuration;
                } else {
                    throw new Error(`Server returned ${response.status}`);
                }
            } catch (err) {
                console.error('Error sending audio:', err);
                statusDiv.textContent = 'Status: Error sending audio to server';
                responseDiv.textContent = 'Error: ' + err.message;
            }
        }

        // Handle button click
        recordButton.addEventListener('click', async () => {
            if (!mediaRecorder) {
                const setup = await setupRecorder();
                if (!setup) return;
            }

            if (mediaRecorder.state === 'inactive') {
                // Start recording
                audioChunks = [];
                mediaRecorder.start();
                recordButton.textContent = 'Stop Recording';
                statusDiv.textContent = 'Status: Recording...';
                responseDiv.textContent = '';
            } else {
                // Stop recording
                mediaRecorder.stop();
                recordButton.textContent = 'Start Recording';
                statusDiv.textContent = 'Status: Processing...';
            }
        });
    </script>
</body>
</html>

Key Features

This sample project demonstrates several important features:
  1. Audio Recording:
    • Browser-based audio recording using the MediaRecorder API
    • Automatic conversion from WebM to WAV format
    • Resampling to 16kHz mono audio
  2. Audio Processing:
    • WAV file generation with proper headers
    • Base64 encoding for API transmission
  3. API Integration:
    • REST API authentication
    • JSON payload formatting
    • Response handling and display
  4. User Interface:
    • Simple recording controls
    • Audio playback
    • Status updates
    • Response display

Usage

  1. Replace <YOUR_API_KEY> with your actual API key
  2. Save the file with a .html extension
  3. Open in a modern web browser
  4. Click “Start Recording” to begin recording
  5. Click “Stop Recording” to stop and send to the API
  6. View the transcription response below the audio player
Make sure you’re using a modern browser that supports the MediaRecorder API and WebAudio API. The sample requires HTTPS or localhost for microphone access.