> ## Documentation Index
> Fetch the complete documentation index at: https://docs.orq.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# LLM response streaming

> Enable real-time streaming for LLM responses. Deliver incremental content for better UX with Server-Sent Events, React hooks, and error handling patterns.

**Use Cases**

* Chat UIs that show responses as they arrive, before generation completes.
* Long-form generation (reports, code) where waiting for the full output hurts UX.
* Agent workflows that surface reasoning steps or tool calls in real time.
* Reducing perceived latency on slow models or large outputs.

***

## Quick Start

Enable real-time response streaming for better user experience.

<CodeGroup>
  ```bash cURL theme={"theme":{"light":"github-light","dark":"github-dark"}}
  curl -N -X POST https://api.orq.ai/v3/router/responses \
    -H "Authorization: Bearer $ORQ_API_KEY" \
    -H "Content-Type: application/json" \
    -d '{
      "model": "openai/gpt-5.4",
      "input": "Write a story about space exploration",
      "stream": true
    }'
  ```

  ```typescript TypeScript theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const stream = await client.responses.create({
    model: "openai/gpt-5.4",
    input: "Write a story about space exploration",
    stream: true,
  });

  for await (const event of stream) {
    if (event.type === "response.output_text.delta") {
      process.stdout.write(event.delta);
    }
  }
  ```

  ```python Python theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import OpenAI
  import os

  client = OpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  stream = client.responses.create(
      model="openai/gpt-5.4",
      input="Write a story about space exploration",
      stream=True,
  )

  for event in stream:
      if event.type == "response.output_text.delta":
          print(event.delta, end="", flush=True)
  ```

  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const stream = await client.chat.completions.create({
    model: "openai/gpt-5.4",
    messages: [
      { role: "user", content: "Write a story about space exploration" },
    ],
    stream: true,
  });

  for await (const chunk of stream) {
    const content = chunk.choices[0]?.delta?.content || "";
    if (content) {
      process.stdout.write(content);
    }
  }
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import OpenAI
  import os

  client = OpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  stream = client.chat.completions.create(
      model="openai/gpt-5.4",
      messages=[{"role": "user", "content": "Write a story about space exploration"}],
      stream=True,
  )

  for chunk in stream:
      if chunk.choices and chunk.choices[0].delta.content is not None:
          print(chunk.choices[0].delta.content, end="", flush=True)
  ```
</CodeGroup>

## Configuration

| Parameter | Type    | Required | Description                |
| --------- | ------- | -------- | -------------------------- |
| `stream`  | boolean | Yes      | Enable streaming responses |

**All models support streaming**: no additional configuration needed.

## Response Format

**Streaming chunks:**

<CodeGroup>
  ```json JSON theme={"theme":{"light":"github-light","dark":"github-dark"}}
  {
    "type": "response.output_text.delta",
    "delta": "Hello"
  }
  ```

  ```json JSON (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  {
    "id": "chatcmpl-123",
    "object": "chat.completion.chunk",
    "created": 1677652288,
    "model": "openai/gpt-5.4",
    "choices": [
      {
        "index": 0,
        "delta": {
          "content": "Hello"
        },
        "finish_reason": null
      }
    ]
  }
  ```
</CodeGroup>

**Final chunk:**

<CodeGroup>
  ```json JSON theme={"theme":{"light":"github-light","dark":"github-dark"}}
  {
    "type": "response.output_text.done"
  }
  ```

  ```json JSON (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  {
    "choices": [
      {
        "index": 0,
        "delta": {},
        "finish_reason": "stop"
      }
    ]
  }
  ```
</CodeGroup>

## Code examples

<CodeGroup>
  ```bash cURL theme={"theme":{"light":"github-light","dark":"github-dark"}}
  curl -N -X POST https://api.orq.ai/v3/router/responses \
    -H "Authorization: Bearer $ORQ_API_KEY" \
    -H "Content-Type: application/json" \
    -d '{
      "model": "openai/gpt-5.4",
      "input": "Write a detailed explanation of quantum computing",
      "stream": true
    }'
  ```

  ```bash cURL (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  curl -N -X POST https://api.orq.ai/v3/router/chat/completions \
    -H "Authorization: Bearer $ORQ_API_KEY" \
    -H "Content-Type: application/json" \
    -d '{
      "model": "openai/gpt-5.4",
      "messages": [
        {
          "role": "user",
          "content": "Write a detailed explanation of quantum computing"
        }
      ],
      "stream": true
    }'
  ```

  ```typescript TypeScript theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const stream = await client.responses.create({
    model: "openai/gpt-5.4",
    input: "Write a detailed explanation of quantum computing",
    stream: true,
  });

  for await (const event of stream) {
    if (event.type === "response.output_text.delta") {
      process.stdout.write(event.delta);
    }
  }
  ```

  ```python Python theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import OpenAI
  import os

  client = OpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  stream = client.responses.create(
      model="openai/gpt-5.4",
      input="Write a detailed explanation of quantum computing",
      stream=True,
  )

  for event in stream:
      if event.type == "response.output_text.delta":
          print(event.delta, end="", flush=True)
  ```

  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const stream = await client.chat.completions.create({
    model: "openai/gpt-5.4",
    messages: [
      {
        role: "user",
        content: "Write a detailed explanation of quantum computing",
      },
    ],
    stream: true,
  });

  for await (const chunk of stream) {
    process.stdout.write(chunk.choices[0]?.delta?.content || "");
  }
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import OpenAI
  import os

  client = OpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  stream = client.chat.completions.create(
      model="openai/gpt-5.4",
      messages=[
          {
              "role": "user",
              "content": "Write a detailed explanation of quantum computing",
          }
      ],
      stream=True,
  )

  for chunk in stream:
      if chunk.choices and chunk.choices[0].delta.content is not None:
          print(chunk.choices[0].delta.content, end="")
  ```
</CodeGroup>

## Stream Processing Patterns

<Note>
  The examples in this section use the Chat Completions endpoint. The same patterns apply to the Responses API: replace `chat.completions.create(...)` with `responses.create(...)`, update the endpoint to `/v3/router/responses`, and handle `response.output_text.delta` events instead of `choices[0].delta.content`.
</Note>

### Basic processing

Accumulate deltas into a full string and detect completion via `finish_reason`.

<CodeGroup>
  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  const processStream = async (stream) => {
    let fullResponse = "";

    for await (const chunk of stream) {
      const content = chunk.choices[0]?.delta?.content || "";
      if (content) {
        fullResponse += content;
        console.log(content); // Real-time output
      }

      // Check for completion
      if (chunk.choices[0]?.finish_reason) {
        console.log(`\nStream finished: ${chunk.choices[0].finish_reason}`);
        break;
      }
    }

    return fullResponse;
  };
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  def process_stream(stream) -> str:
      full_response = ""

      for chunk in stream:
          content = chunk.choices[0].delta.content or "" if chunk.choices else ""
          if content:
              full_response += content
              print(content, end="", flush=True)

          if chunk.choices and chunk.choices[0].finish_reason:
              print(f"\nStream finished: {chunk.choices[0].finish_reason}")
              break

      return full_response
  ```
</CodeGroup>

### With error handling

Guard against network drops and unexpected errors by wrapping the stream loop in a try/except. The TypeScript example additionally resets a timeout on each chunk.

<CodeGroup>
  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  const updateUI = (content: string) => { process.stdout.write(content); }; // replace with your UI update logic

  const robustStreamProcessing = async (stream) => {
    try {
      let response = "";
      const timeout = setTimeout(() => {
        throw new Error("Stream timeout");
      }, 30000);

      for await (const chunk of stream) {
        clearTimeout(timeout);

        if (chunk.choices[0]?.delta?.content) {
          response += chunk.choices[0].delta.content;
          // Update UI with new content
          updateUI(chunk.choices[0].delta.content);
        }

        if (chunk.choices[0]?.finish_reason) {
          break;
        }
      }

      return response;
    } catch (error) {
      console.error("Streaming error:", error);
      throw error;
    }
  };
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import sys

  def update_ui(content: str) -> None:
      sys.stdout.write(content)  # replace with actual UI update logic

  def robust_stream_processing(stream) -> str:
      try:
          response = ""
          for chunk in stream:
              if chunk.choices and chunk.choices[0].delta.content:
                  response += chunk.choices[0].delta.content
                  update_ui(chunk.choices[0].delta.content)

              if chunk.choices and chunk.choices[0].finish_reason:
                  break

          return response
      except Exception as error:
          print(f"Streaming error: {error}", file=sys.stderr)
          raise
  ```
</CodeGroup>

## Function Calling with Streaming

Stream tool calls as they're generated:

<CodeGroup>
  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const tools = [
    {
      type: "function" as const,
      function: {
        name: "get_weather",
        description: "Get current weather",
        parameters: {
          type: "object",
          properties: { location: { type: "string" } },
          required: ["location"],
        },
      },
    },
  ];

  const stream = await client.chat.completions.create({
    model: "openai/gpt-5.4",
    messages: [{ role: "user", content: "What's the weather in Paris?" }],
    tools,
    stream: true,
  });

  for await (const chunk of stream) {
    if (!chunk.choices.length) continue;
    const delta = chunk.choices[0].delta;
    if (delta.tool_calls?.[0]?.function?.arguments) {
      process.stdout.write(delta.tool_calls[0].function.arguments);
    } else if (delta.content) {
      process.stdout.write(delta.content);
    }
  }
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import OpenAI
  import os

  client = OpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  tools = [
      {
          "type": "function",
          "function": {
              "name": "get_weather",
              "description": "Get current weather",
              "parameters": {
                  "type": "object",
                  "properties": {
                      "location": {"type": "string"}
                  },
                  "required": ["location"]
              }
          }
      }
  ]

  stream = client.chat.completions.create(
      model="openai/gpt-5.4",
      messages=[{"role": "user", "content": "What's the weather in Paris?"}],
      tools=tools,
      stream=True
  )

  for chunk in stream:
      if not chunk.choices:
          continue
      # Handle tool calls
      if chunk.choices[0].delta.tool_calls:
          tool_call = chunk.choices[0].delta.tool_calls[0]
          if tool_call.function.arguments:
              print(tool_call.function.arguments, end="")

      # Handle regular content
      elif chunk.choices[0].delta.content:
          print(chunk.choices[0].delta.content, end="")
  ```
</CodeGroup>

## UI Integration Examples

### React hook for streaming

Encapsulate streaming state in a hook so components receive `response` and `isStreaming` without managing the event loop themselves.

<CodeGroup>
  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";
  import { useState, useCallback } from "react";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const useStreamingChat = () => {
    const [response, setResponse] = useState("");
    const [isStreaming, setIsStreaming] = useState(false);

    const streamChat = useCallback(async (message) => {
      setIsStreaming(true);
      setResponse("");

      try {
        const stream = await client.chat.completions.create({
          model: "openai/gpt-5.4",
          messages: [{ role: "user", content: message }],
          stream: true,
        });

        for await (const chunk of stream) {
          const content = chunk.choices[0]?.delta?.content || "";
          if (content) {
            setResponse((prev) => prev + content);
          }

          if (chunk.choices[0]?.finish_reason) {
            setIsStreaming(false);
            break;
          }
        }
      } catch (error) {
        console.error("Streaming failed:", error);
        setIsStreaming(false);
      }
    }, []);

    return { response, isStreaming, streamChat };
  };
  ```
</CodeGroup>

**Server-Sent Events (Browser):**

<CodeGroup>
  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  const streamWithSSE = async (message: string): Promise<void> => {
    const response = await fetch("/api/chat-stream", {
      method: "POST",
      headers: { "Content-Type": "application/json" },
      body: JSON.stringify({ message }),
    });

    if (!response.ok || !response.body) {
      throw new Error(`Request failed: ${response.status}`);
    }

    const reader = response.body.getReader();
    const decoder = new TextDecoder();
    const output = document.getElementById("response")!;
    let buffer = "";

    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      buffer += decoder.decode(value, { stream: true });
      const lines = buffer.split("\n");
      buffer = lines.pop() ?? "";

      for (const line of lines) {
        if (line === "data: [DONE]") break;
        if (!line.startsWith("data: ")) continue;
        const data = JSON.parse(line.slice(6));
        const content = data.choices[0]?.delta?.content;
        if (content) output.innerHTML += content;
      }
    }
  };
  ```
</CodeGroup>

## Performance Optimization

### Chunk buffering

Batching small chunks before flushing to the UI reduces render cycles and smooths perceived output.

<CodeGroup>
  ```typescript TypeScript theme={"theme":{"light":"github-light","dark":"github-dark"}}
  class StreamBuffer {
    private buffer: string;
    private flushInterval: number;
    private lastFlush: number;

    constructor(flushInterval = 50) {
      this.buffer = "";
      this.flushInterval = flushInterval;
      this.lastFlush = Date.now();
    }

    add(content: string): void {
      this.buffer += content;

      // Flush periodically or when buffer is large
      if (
        Date.now() - this.lastFlush > this.flushInterval ||
        this.buffer.length > 100
      ) {
        this.flush();
      }
    }

    flush(): void {
      if (this.buffer) {
        this.onFlush(this.buffer);
        this.buffer = "";
        this.lastFlush = Date.now();
      }
    }

    onFlush(content: string): void {
      // Override this method
      console.log(content);
    }
  }
  ```

  ```python Python theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import time
  import sys

  class StreamBuffer:
      def __init__(self, flush_interval: float = 0.05):
          self._buffer = ""
          self._flush_interval = flush_interval
          self._last_flush = time.time()

      def add(self, content: str) -> None:
          self._buffer += content
          if time.time() - self._last_flush > self._flush_interval or len(self._buffer) > 100:
              self.flush()

      def flush(self) -> None:
          if self._buffer:
              self.on_flush(self._buffer)
              self._buffer = ""
              self._last_flush = time.time()

      def on_flush(self, content: str) -> None:
          sys.stdout.write(content)  # override in subclass
  ```
</CodeGroup>

### Memory management

For long completions, cap accumulation to avoid unbounded memory growth.

<CodeGroup>
  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  const processLargeStream = async (stream, maxMemory = 1000000) => {
    let totalLength = 0;
    const chunks = [];

    for await (const chunk of stream) {
      const content = chunk.choices[0]?.delta?.content || "";

      if (content) {
        totalLength += content.length;
        chunks.push(content);

        // Prevent memory overflow
        if (totalLength > maxMemory) {
          console.warn("Stream too large, truncating");
          break;
        }
      }

      if (chunk.choices[0]?.finish_reason) {
        break;
      }
    }

    return chunks.join("");
  };
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import sys

  def process_large_stream(stream, max_memory: int = 1_000_000) -> str:
      total_length = 0
      chunks = []

      for chunk in stream:
          content = chunk.choices[0].delta.content or "" if chunk.choices else ""

          if content:
              total_length += len(content)
              chunks.append(content)

              if total_length > max_memory:
                  print("Stream too large, truncating", file=sys.stderr)
                  break

          if chunk.choices and chunk.choices[0].finish_reason:
              break

      return "".join(chunks)
  ```
</CodeGroup>

## Best Practices

### Stream management

* Set reasonable timeouts (30-60 seconds).
* Implement proper error boundaries.
* Handle network interruptions gracefully.
* Provide user cancellation options.

### UI/UX considerations

* Show typing indicators during streaming.
* Allow users to stop generation.
* Buffer small chunks for smoother display.
* Handle rapid updates efficiently.

### Error recovery example

<CodeGroup>
  ```typescript TypeScript theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const streamWithRetry = async (input: string, maxRetries = 3) => {
    for (let attempt = 1; attempt <= maxRetries; attempt++) {
      try {
        const stream = await client.responses.create({
          model: "openai/gpt-5.4",
          input,
          stream: true,
        });

        let fullResponse = "";
        for await (const event of stream) {
          if (event.type === "response.output_text.delta") {
            fullResponse += event.delta;
            process.stdout.write(event.delta);
          }
        }
        return fullResponse;
      } catch (error) {
        if (attempt === maxRetries) throw error;

        console.log(`Stream attempt ${attempt} failed, retrying...`);
        await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
      }
    }
  };
  ```

  ```python Python theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import OpenAI
  import os
  import sys
  import time

  client = OpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  def stream_responses_with_retry(input_text: str, max_retries: int = 3) -> str:
      for attempt in range(1, max_retries + 1):
          try:
              stream = client.responses.create(
                  model="openai/gpt-5.4",
                  input=input_text,
                  stream=True,
              )
              full_response = ""
              for event in stream:
                  if event.type == "response.output_text.delta":
                      full_response += event.delta
                      sys.stdout.write(event.delta)
                      sys.stdout.flush()
              return full_response
          except Exception:
              if attempt == max_retries:
                  raise
              print(f"Stream attempt {attempt} failed, retrying...")
              time.sleep(attempt)
      return ""
  ```

  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const streamWithRetry = async (messages: { role: string; content: string }[], maxRetries = 3) => {
    for (let attempt = 1; attempt <= maxRetries; attempt++) {
      try {
        const stream = await client.chat.completions.create({
          model: "openai/gpt-5.4",
          messages,
          stream: true,
        });

        let fullResponse = "";
        for await (const chunk of stream) {
          const content = chunk.choices[0]?.delta?.content || "";
          if (content) {
            fullResponse += content;
            process.stdout.write(content);
          }
        }
        return fullResponse;
      } catch (error) {
        if (attempt === maxRetries) throw error;

        console.log(`Stream attempt ${attempt} failed, retrying...`);
        await new Promise((resolve) => setTimeout(resolve, 1000 * attempt));
      }
    }
  };
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import OpenAI
  import os
  import sys
  import time

  client = OpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  def stream_chat_with_retry(messages: list, max_retries: int = 3) -> str:
      for attempt in range(1, max_retries + 1):
          try:
              stream = client.chat.completions.create(
                  model="openai/gpt-5.4",
                  messages=messages,
                  stream=True,
              )
              full_response = ""
              for chunk in stream:
                  content = chunk.choices[0].delta.content or "" if chunk.choices else ""
                  if content:
                      full_response += content
                      sys.stdout.write(content)
                      sys.stdout.flush()
              return full_response
          except Exception:
              if attempt == max_retries:
                  raise
              print(f"Stream attempt {attempt} failed, retrying...")
              time.sleep(attempt)
      return ""
  ```
</CodeGroup>

## Troubleshooting

**Stream cuts off unexpectedly**

* Check network stability.

* Verify timeout settings.

* Monitor for rate limiting.

* Check model-specific limits.
  **Slow streaming performance**

* Optimize chunk processing.

* Reduce buffer flush frequency.

* Check network latency.

* Consider model selection.
  **Memory issues**

* Implement chunk size limits.

* Use streaming parsers.

* Clear processed chunks.

* Monitor memory usage.

## Limitations

| Limitation               | Impact                  | Workaround                    |
| ------------------------ | ----------------------- | ----------------------------- |
| **Network interruption** | Stream breaks           | Implement reconnection logic  |
| **Processing overhead**  | Slight performance cost | Optimize chunk handling       |
| **Model variations**     | Different chunk sizes   | Handle variable chunk lengths |
| **Rate limiting**        | Stream throttling       | Implement backoff strategies  |

## Advanced Features

<Note>
  The examples in this section use the Chat Completions endpoint. The same patterns apply to the Responses API: replace `chat.completions.create(...)` with `responses.create(...)`. For cURL, use `/v3/router/responses`.
</Note>

### Stream with other Gateway features

**AI Gateway** features like caching, timeouts, and deployment names compose directly with streaming: add them to the same request object.

<CodeGroup>
  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const advancedStream = await client.chat.completions.create({
    model: "openai/gpt-5.4",
    messages: [{ role: "user", content: "Explain machine learning" }],
    stream: true,
    name: "StreamingBot-v1",
    cache: { type: "exact_match", ttl: 3600 },
    timeout: { call_timeout: 30000 },
  });
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import OpenAI
  import os

  client = OpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  stream = client.chat.completions.create(
      model="openai/gpt-5.4",
      messages=[{"role": "user", "content": "Explain machine learning"}],
      stream=True,
      extra_body={
          "name": "StreamingBot-v1",
          "cache": {"type": "exact_match", "ttl": 3600},
          "timeout": {"call_timeout": 30000},
      },
  )

  for chunk in stream:
      if chunk.choices and chunk.choices[0].delta.content:
          print(chunk.choices[0].delta.content, end="", flush=True)
  ```
</CodeGroup>

### Parallel streaming

Fire multiple streams concurrently using `Promise.all` in TypeScript and `asyncio.gather` in Python to get independent responses without waiting for each to finish.

<CodeGroup>
  ```typescript TypeScript (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  import OpenAI from "openai";

  const client = new OpenAI({
    apiKey: process.env.ORQ_API_KEY,
    baseURL: "https://api.orq.ai/v3/router",
  });

  const processQuery = async (query) => {
    const stream = await client.chat.completions.create({
      model: "openai/gpt-5.4",
      messages: [{ role: "user", content: query }],
      stream: true,
    });
    let fullResponse = "";
    for await (const chunk of stream) {
      const content = chunk.choices[0]?.delta?.content || "";
      if (content) fullResponse += content;
    }
    return fullResponse;
  };

  const parallelStreaming = async (queries) => Promise.all(queries.map(processQuery));
  ```

  ```python Python (Chat Completions) theme={"theme":{"light":"github-light","dark":"github-dark"}}
  from openai import AsyncOpenAI
  import asyncio
  import os

  client = AsyncOpenAI(
      api_key=os.environ.get("ORQ_API_KEY"),
      base_url="https://api.orq.ai/v3/router",
  )

  async def process_query(query: str) -> str:
      stream = await client.chat.completions.create(
          model="openai/gpt-5.4",
          messages=[{"role": "user", "content": query}],
          stream=True,
      )
      full_response = ""
      async for chunk in stream:
          content = chunk.choices[0].delta.content or "" if chunk.choices else ""
          if content:
              full_response += content
      return full_response

  async def parallel_streaming(queries: list[str]) -> list[str]:
      return await asyncio.gather(*[process_query(query) for query in queries])
  ```
</CodeGroup>
