{"service":"Omnia Speech-to-Text API","version":"2.1.0","endpoints":{"/stream":{"method":"WebSocket","description":"Real-time streaming transcription","authentication":{"primary":"Sec-WebSocket-Protocol: token, YOUR_API_KEY (Deepgram-style, recommended)","fallback":"Send {\"type\": \"auth\", \"apiKey\": \"YOUR_KEY\"} message after connect"},"audio_requirements":{"recommended_format":"PCM 16-bit signed little-endian (LINEAR16)","sample_rate":"16000 Hz","channels":1,"chunk_size":"640 bytes (20ms of 16kHz 16-bit mono audio)","chunk_interval":"20ms recommended for optimal real-time performance","alternative_formats":["MP3","FLAC","OGG_OPUS","WEBM_OPUS"],"note":"For compressed formats (MP3, FLAC, etc.), send chunks as they become available"},"messages":{"client_to_server":{"audio":"Binary frames containing audio data","control":"{\"type\": \"end\"} to signal end of audio"},"server_to_client":{"ready":"{\"type\": \"ready\", \"sessionId\": \"...\", \"message\": \"...\"}","transcript":"{\"type\": \"transcript\", \"transcript\": \"text\", \"isFinal\": true/false, \"confidence\": 0.95, \"language\": \"fi-FI\"}","error":"{\"type\": \"error\", \"error\": \"message\"}"}},"example":{"javascript":"\n// Recommended: Sec-WebSocket-Protocol authentication (Deepgram-style)\nconst ws = new WebSocket('wss://stt.omnia-voice.com/stream', ['token', 'ck_YOUR_API_KEY']);\n\nws.onopen = () => {\n  // Ready immediately - start sending audio\n  console.log('Connected and authenticated');\n};\n\nws.onmessage = (event) => {\n  const msg = JSON.parse(event.data);\n  if (msg.type === 'ready') {\n    console.log('Session:', msg.sessionId);\n  } else if (msg.type === 'transcript') {\n    console.log(msg.isFinal ? '[FINAL]' : '[INTERIM]', msg.transcript);\n  }\n};\n\n// Send 20ms chunks (640 bytes for 16kHz 16-bit mono)\nfunction sendAudioChunk(pcmData) {\n  ws.send(pcmData); // Binary ArrayBuffer or Blob\n}\n"}},"/transcribe":{"method":"POST","description":"Synchronous file transcription (up to 60 seconds)","authentication":"X-API-Key: YOUR_API_KEY header","request":{"content_type":"audio/* (audio/mpeg, audio/wav, audio/flac, audio/ogg, audio/webm)","body":"Raw audio file bytes","max_size":"10 MB","max_duration":"60 seconds"},"supported_formats":{"audio/mpeg":"MP3 files","audio/wav":"WAV files (PCM)","audio/flac":"FLAC files","audio/ogg":"OGG Opus files","audio/webm":"WebM Opus files","audio/x-raw":"Raw PCM (16-bit, 16kHz, mono)"},"response":{"transcript":"Full transcription text","segments":"[{transcript, confidence, language, isFinal}]","audioSize":"Size in bytes","processingTime":"Processing time in ms"},"example":{"curl":"\ncurl -X POST https://stt.omnia-voice.com/transcribe \\\n  -H \"X-API-Key: YOUR_KEY\" \\\n  -H \"Content-Type: audio/mpeg\" \\\n  --data-binary @audio.mp3\n","javascript":"\nconst response = await fetch('https://stt.omnia-voice.com/transcribe', {\n  method: 'POST',\n  headers: {\n    'X-API-Key': 'YOUR_KEY',\n    'Content-Type': 'audio/mpeg'\n  },\n  body: audioFile // File or Blob\n});\nconst result = await response.json();\nconsole.log(result.transcript);\n"}},"/health":{"method":"GET","description":"Health check endpoint","response":{"status":"ok or error","activeConnections":"Number of active WebSocket connections"}},"/metrics":{"method":"GET","description":"Usage statistics and monitoring (requires API key). Metrics are persisted in Redis.","authentication":"Required - API key via ?apiKey=, X-API-Key header, or Bearer token","response":{"uptime":"Server uptime in ms, hours, and ISO timestamp","current":{"activeConnections":"Current WebSocket connections","activeStreams":"Current active streaming sessions"},"totals":{"streamingSessions":"Total streaming sessions since startup","transcribeRequests":"Total transcribe requests since startup","streamingDuration":"Total streaming session duration (ms, seconds, minutes)","audioDuration":"Estimated total audio duration processed (ms, seconds, minutes)","bytesProcessed":"Total bytes of audio processed (bytes, MB)"},"averages":{"streamingSessionDurationMs":"Average streaming session duration","audioDurationPerSessionMs":"Average audio duration per session","sessionsPerHour":"Average sessions per hour"}},"duration_tracking":{"streaming":{"description":"Duration is calculated when WebSocket connection closes","session_duration":"Wall clock time from connect to disconnect","audio_duration":"Estimated from bytes: bytes / (sampleRate * bytesPerSample * channels) * 1000","formula":"For PCM 16kHz 16-bit mono: bytes / 32000 * 1000 = milliseconds","example":"320000 bytes = 10 seconds of audio"},"transcribe":{"description":"Duration is estimated from file size when request completes","audio_duration":"Estimated based on encoding format and file size"}}}},"audio_recommendations":{"streaming":{"format":"PCM 16-bit signed little-endian","sample_rate":16000,"channels":1,"chunk_duration_ms":20,"chunk_size_bytes":640,"calculation":"sample_rate × bytes_per_sample × channels × duration = 16000 × 2 × 1 × 0.02 = 640 bytes"},"file_upload":{"preferred_formats":["MP3","FLAC","WAV"],"max_duration_seconds":60,"max_size_mb":10}},"supported_languages":["fi-FI","sv-SE","en-US"],"errors":{"401":"Unauthorized - Invalid or missing API key","413":"File too large - Maximum 10MB","503":"Service unavailable - Server at capacity"}}