Skip to main content

AI Router

Overview

Use Azure AI Inference SDK to route all model calls through Orq.ai’s AI Router. Point ChatCompletionsClient at Orq’s endpoint to access 250+ models from 20+ providers — OpenAI, Anthropic, Google, and more — without changing your agent logic.

Key Benefits

Complete Observability

Track every agent step, tool use, and LLM call with detailed traces and analytics

Built-in Reliability

Automatic fallbacks, retries, and load balancing for production resilience

Cost Optimization

Real-time cost tracking and spend management across all your AI operations

Multi-Provider Access

Access 250+ LLMs and 20+ providers through a single, unified integration

Prerequisites

  • An Orq.ai account and API Key
  • Python 3.9 or higher
To set up your API key, see API keys & Endpoints.

Installation

pip install azure-ai-inference azure-core

Configuration

Configure ChatCompletionsClient to point at Orq.ai’s AI Router:
Python
import os
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential

client = ChatCompletionsClient(
    endpoint="https://api.orq.ai/v2/router",
    credential=AzureKeyCredential(os.environ["ORQ_API_KEY"]),
)
endpoint: https://api.orq.ai/v2/router

Basic Example

Python
import os
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential

client = ChatCompletionsClient(
    endpoint="https://api.orq.ai/v2/router",
    credential=AzureKeyCredential(os.environ["ORQ_API_KEY"]),
)

response = client.complete(
    model="openai/gpt-4o",
    messages=[
        SystemMessage(content="You are a helpful research assistant. Answer questions concisely and accurately."),
        UserMessage(content="What are the three most important factors when evaluating an LLM for production use?"),
    ],
)

print(response.choices[0].message.content)

Agent with Function Tools

ChatCompletionsClient supports multi-turn tool calling. The agent loop runs until no more tool calls are returned:
Python
import os
import json
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import (
    SystemMessage,
    UserMessage,
    AssistantMessage,
    ToolMessage,
    ChatCompletionsToolDefinition,
    FunctionDefinition,
)
from azure.core.credentials import AzureKeyCredential

client = ChatCompletionsClient(
    endpoint="https://api.orq.ai/v2/router",
    credential=AzureKeyCredential(os.environ["ORQ_API_KEY"]),
)

tools = [
    ChatCompletionsToolDefinition(
        function=FunctionDefinition(
            name="get_weather",
            description="Get the current weather for a given location.",
            parameters={
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and country, e.g. Amsterdam, NL",
                    }
                },
                "required": ["location"],
            },
        )
    )
]


def get_weather(location: str) -> str:
    data = {
        "amsterdam, nl": "Cloudy, 14°C",
        "london, uk": "Rainy, 11°C",
        "san francisco, us": "Sunny, 18°C",
    }
    return data.get(location.lower(), f"No weather data for {location}")


messages = [
    SystemMessage(content="You are a weather assistant. Always use get_weather to look up weather."),
    UserMessage(content="What's the weather in Amsterdam and London?"),
]

# Agent loop — run until no tool calls remain
while True:
    response = client.complete(
        model="openai/gpt-4o",
        messages=messages,
        tools=tools,
    )

    choice = response.choices[0]

    if choice.finish_reason == "tool_calls":
        messages.append(AssistantMessage(tool_calls=choice.message.tool_calls))
        for tool_call in choice.message.tool_calls:
            args = json.loads(tool_call.function.arguments)
            result = get_weather(args["location"])
            messages.append(ToolMessage(tool_call_id=tool_call.id, content=result))
    else:
        print(choice.message.content)
        break

Model Selection

Switch models by changing the model parameter. All 250+ models are available through the same client:
Python
import os
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential

client = ChatCompletionsClient(
    endpoint="https://api.orq.ai/v2/router",
    credential=AzureKeyCredential(os.environ["ORQ_API_KEY"]),
)

messages = [
    SystemMessage(content="You are a helpful assistant."),
    UserMessage(content="Explain transformer architectures briefly."),
]

# Use Claude
response = client.complete(model="anthropic/claude-sonnet-4-6", messages=messages)

# Use Gemini
response = client.complete(model="google/gemini-2.5-flash", messages=messages)

# Use GPT-4o
response = client.complete(model="openai/gpt-4o", messages=messages)

print(response.choices[0].message.content)

Observability

Overview

Instrument your Azure AI Agents application with OpenTelemetry to send traces to Orq.ai. The azure-core-tracing-opentelemetry package hooks into the Azure SDK’s distributed tracing mechanism, automatically capturing spans for every agent call, thread operation, and LLM invocation.

Prerequisites

  • An Orq.ai account and API Key
  • Azure AI Foundry project with an agent deployed
  • Python 3.9+
  • AZURE_AI_PROJECT_ENDPOINT — your Azure AI Foundry project endpoint
  • AZURE_AI_MODEL_DEPLOYMENT_NAME — the model deployment name in your Foundry project

Install Dependencies

pip install azure-ai-agents azure-identity \
            azure-core-tracing-opentelemetry>=1.0.0b12 \
            opentelemetry-sdk opentelemetry-exporter-otlp

Configuration

Python
import os
from azure.core.settings import settings
from azure.core.tracing.ext.opentelemetry_span import OpenTelemetrySpan
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry import trace

settings.tracing_implementation = OpenTelemetrySpan

tracer_provider = TracerProvider()
tracer_provider.add_span_processor(
    BatchSpanProcessor(
        OTLPSpanExporter(
            endpoint="https://api.orq.ai/v2/otel/v1/traces",
            headers={"Authorization": f"Bearer {os.environ['ORQ_API_KEY']}"},
        )
    )
)
trace.set_tracer_provider(tracer_provider)
tracer = trace.get_tracer(__name__)

Basic Example

Python
import os
from azure.core.settings import settings
from azure.core.tracing.ext.opentelemetry_span import OpenTelemetrySpan
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry import trace
from azure.ai.agents import AgentsClient
from azure.identity import DefaultAzureCredential

settings.tracing_implementation = OpenTelemetrySpan

tracer_provider = TracerProvider()
tracer_provider.add_span_processor(
    BatchSpanProcessor(
        OTLPSpanExporter(
            endpoint="https://api.orq.ai/v2/otel/v1/traces",
            headers={"Authorization": f"Bearer {os.environ['ORQ_API_KEY']}"},
        )
    )
)
trace.set_tracer_provider(tracer_provider)
tracer = trace.get_tracer(__name__)

with AgentsClient(
    endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
    credential=DefaultAzureCredential()
) as agents_client:
    with tracer.start_as_current_span("azure-agent-workflow"):
        agent = agents_client.create_agent(
            model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"],
            name="Research Assistant",
            instructions="You are a helpful research assistant."
        )
        thread = agents_client.threads.create()
        agents_client.messages.create(
            thread_id=thread.id,
            role="user",
            content="Summarize the key advantages of transformer architectures."
        )
        run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)
        print(f"Run status: {run.status}")
        agents_client.delete_agent(agent.id)

tracer_provider.force_flush()

View Traces

View your traces in the AI Studio in the Traces tab.
Visit your AI Studio to view real-time analytics and traces.