Realtime WebSockets API
Some AI providers support real-time, low-latency interactions over WebSockets. AI Gateway allows seamless integration with these APIs, supporting multimodal interactions such as text, audio, and video.
For real-time WebSockets, authentication can be done using:
- Headers (for non-browser environments)
sec-websocket-protocol
(for browsers)
import WebSocket from "ws";
const url = "wss://gateway.ai.cloudflare.com/v1/<account_id>/<gateway>/openai?model=gpt-4o-realtime-preview-2024-12-17";const ws = new WebSocket(url, { headers: { "cf-aig-authorization": process.env.CLOUDFLARE_API_KEY, Authorization: "Bearer " + process.env.OPENAI_API_KEY, "OpenAI-Beta": "realtime=v1", },});
ws.on("open", () => console.log("Connected to server."));ws.on("message", (message) => console.log(JSON.parse(message.toString())));
ws.send( JSON.stringify({ type: "response.create", response: { modalities: ["text"], instructions: "Tell me a joke" }, }),);
const ws = new WebSocket( "wss://gateway.ai.cloudflare.com/v1/<account_id>/<gateway>/google?api_key=<google_api_key>", ["cf-aig-authorization.<cloudflare_token>"],);
ws.on("open", () => console.log("Connected to server."));ws.on("message", (message) => console.log(message.data));
ws.send( JSON.stringify({ setup: { model: "models/gemini-2.0-flash-exp", generationConfig: { responseModalities: ["TEXT"] }, }, }),);
const ws = new WebSocket( "wss://gateway.ai.cloudflare.com/v1/<account_id>/<gateway>/cartesia?cartesia_version=2024-06-10&api_key=<cartesia_api_key>", ["cf-aig-authorization.<cloudflare_token>"],);
ws.on("open", function open() { console.log("Connected to server.");});
ws.on("message", function incoming(message) { console.log(message.data);});
ws.send( JSON.stringify({ model_id: "sonic", transcript: "Hello, world! I'm generating audio on ", voice: { mode: "id", id: "a0e99841-438c-4a64-b679-ae501e7d6091" }, language: "en", context_id: "happy-monkeys-fly", output_format: { container: "raw", encoding: "pcm_s16le", sample_rate: 8000, }, add_timestamps: true, continue: true, }),);
const ws = new WebSocket( "wss://gateway.ai.cloudflare.com/v1/<account_id>/<gateway>/elevenlabs?agent_id=<elevenlabs_agent_id>", [ "xi-api-key.<elevenlabs_api_key>", "cf-aig-authorization.<cloudflare_token>", ],);
ws.on("open", function open() { console.log("Connected to server.");});
ws.on("message", function incoming(message) { console.log(message.data);});
ws.send( JSON.stringify({ text: "This is a sample text ", voice_settings: { stability: 0.8, similarity_boost: 0.8 }, generation_config: { chunk_length_schedule: [120, 160, 250, 290] }, }),);
Fal AI supports WebSocket connections for real-time model interactions through their HTTP over WebSocket API.
const ws = new WebSocket( "wss://gateway.ai.cloudflare.com/v1/<account_id>/<gateway>/fal/fal-ai/fast-lcm-diffusion", ["fal-api-key.<fal_api_key>", "cf-aig-authorization.<cloudflare_token>"],);
ws.on("open", function open() { console.log("Connected to server.");});
ws.on("message", function incoming(message) { console.log(message.data);});
ws.send( JSON.stringify({ prompt: "generate an image of a cat flying an aeroplane", }),);
For more information on Fal AI's WebSocket API, see their HTTP over WebSocket documentation ↗.
Workers AI provides Deepgram models for real-time speech-to-text (STT) and text-to-speech (TTS) capabilities through WebSocket connections.
Workers AI supports two Deepgram STT models: @cf/deepgram/nova-3
and @cf/deepgram/flux
. The following example demonstrates real-time audio transcription from a microphone:
import WebSocket from "ws";import mic from "mic";
const ws = new WebSocket( "wss://gateway.ai.cloudflare.com/v1/<account_id>/<gateway>/workers-ai?model=@cf/deepgram/nova-3&encoding=linear16&sample_rate=16000&interim_results=true", { headers: { "cf-aig-authorization": process.env.CLOUDFLARE_API_KEY, }, },);
// Configure microphoneconst micInstance = mic({ rate: "16000", channels: "1", debug: false, exitOnSilence: 6,});
const micInputStream = micInstance.getAudioStream();
micInputStream.on("data", (data) => { if (ws.readyState === WebSocket.OPEN) { ws.send(data); }});
micInputStream.on("error", (error) => { console.error("Microphone error:", error);});
ws.onopen = () => { console.log("Connected to WebSocket"); console.log("Starting microphone..."); micInstance.start();};
ws.onmessage = (event) => { try { const parse = JSON.parse(event.data); if (parse.channel?.alternatives?.[0]?.transcript) { if (parse.is_final) { console.log( "Final transcript:", parse.channel.alternatives[0].transcript, ); } else { console.log( "Interim transcript:", parse.channel.alternatives[0].transcript, ); } } } catch (error) { console.error("Error parsing message:", error); }};
ws.onerror = (error) => { console.error("WebSocket error:", error);};
ws.onclose = () => { console.log("WebSocket closed"); micInstance.stop();};
Workers AI supports the Deepgram @cf/deepgram/aura-1
model for TTS. The following example demonstrates converting text input to audio:
import WebSocket from "ws";import readline from "readline";import Speaker from "speaker";
const ws = new WebSocket( "wss://gateway.ai.cloudflare.com/v1/<account_id>/<gateway>/workers-ai?model=@cf/deepgram/aura-1", { headers: { "cf-aig-authorization": process.env.CLOUDFLARE_API_KEY, }, },);
// Speaker managementlet currentSpeaker = null;let isPlayingAudio = false;
// Setup readline for text inputconst rl = readline.createInterface({ input: process.stdin, output: process.stdout, prompt: "Enter text to speak (or \"quit\" to exit): ",});
ws.onopen = () => { console.log("Connected to Deepgram TTS WebSocket"); rl.prompt();};
ws.onmessage = (event) => { // Check if message is JSON (metadata, flushed, etc.) or raw audio if (event.data instanceof Buffer || event.data instanceof ArrayBuffer) { // Raw audio data - create new speaker if needed if (!currentSpeaker) { currentSpeaker = new Speaker({ channels: 1, bitDepth: 16, sampleRate: 24000, }); isPlayingAudio = true; } currentSpeaker.write(Buffer.from(event.data)); } else { try { const message = JSON.parse(event.data); switch (message.type) { case "Metadata": console.log("Model info:", message.model_name, message.model_version); break; case "Flushed": console.log("Audio complete"); // End speaker after flush to prevent buffer underflow if (currentSpeaker && isPlayingAudio) { currentSpeaker.end(); currentSpeaker = null; isPlayingAudio = false; } rl.prompt(); break; case "Cleared": console.log("Audio cleared, sequence:", message.sequence_id); break; case "Warning": console.warn("Warning:", message.description); break; } } catch (error) { // Not JSON, might be raw audio as string if (!currentSpeaker) { currentSpeaker = new Speaker({ channels: 1, bitDepth: 16, sampleRate: 24000, }); isPlayingAudio = true; } currentSpeaker.write(Buffer.from(event.data)); } }};
ws.onerror = (error) => { console.error("WebSocket error:", error);};
ws.onclose = () => { console.log("WebSocket closed"); if (currentSpeaker) { currentSpeaker.end(); } rl.close(); process.exit(0);};
// Handle user inputrl.on("line", (input) => { const text = input.trim();
if (text.toLowerCase() === "quit") { // Send Close message ws.send(JSON.stringify({ type: "Close" })); ws.close(); return; }
if (text.length > 0) { // Send text to TTS ws.send( JSON.stringify({ type: "Speak", text: text, }), );
// Flush to get audio immediately ws.send(JSON.stringify({ type: "Flush" })); console.log("Flushing audio"); }
rl.prompt();});
rl.on("close", () => { if (ws.readyState === WebSocket.OPEN) { ws.close(); }});
Was this helpful?
- Resources
- API
- New to Cloudflare?
- Directory
- Sponsorships
- Open Source
- Support
- Help Center
- System Status
- Compliance
- GDPR
- Company
- cloudflare.com
- Our team
- Careers
- © 2025 Cloudflare, Inc.
- Privacy Policy
- Terms of Use
- Report Security Issues
- Trademark