Skip to main content

AI Router

Route your LLM calls through the AI Router with a single endpoint change. Zero vendor lock-in: always run on the best model at the lowest cost for your use case.

Observability

Instrument your code with OpenTelemetry to capture traces, logs, and metrics for every LLM call, agent step, and tool use.

AI Router

Overview

Use Azure AI Inference SDK to route all model calls through Orq.ai’s AI Router. Point ChatCompletionsClient at Orq’s endpoint to access 250+ models from 20+ providers — OpenAI, Anthropic, Google, and more — without changing your agent logic.

Key Benefits

Complete Observability

Track every agent step, tool use, and LLM call with detailed traces and analytics

Built-in Reliability

Automatic fallbacks, retries, and load balancing for production resilience

Cost Optimization

Real-time cost tracking and spend management across all your AI operations

Multi-Provider Access

Access 250+ LLMs and 20+ providers through a single, unified integration

Prerequisites

  • An Orq.ai account and API Key
  • Python 3.9 or higher
To set up your API key, see API keys & Endpoints.

Installation

pip install azure-ai-inference azure-core

Configuration

Configure ChatCompletionsClient to point at Orq.ai’s AI Router:
Python
import os
from azure.ai.inference import ChatCompletionsClient
from azure.core.credentials import AzureKeyCredential

client = ChatCompletionsClient(
    endpoint="https://api.orq.ai/v2/router",
    credential=AzureKeyCredential(os.environ["ORQ_API_KEY"]),
)
endpoint: https://api.orq.ai/v2/router

Basic Example

Python
import os
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential

client = ChatCompletionsClient(
    endpoint="https://api.orq.ai/v2/router",
    credential=AzureKeyCredential(os.environ["ORQ_API_KEY"]),
)

response = client.complete(
    model="openai/gpt-4o",
    messages=[
        SystemMessage(content="You are a helpful research assistant. Answer questions concisely and accurately."),
        UserMessage(content="What are the three most important factors when evaluating an LLM for production use?"),
    ],
)

print(response.choices[0].message.content)

Agent with Function Tools

ChatCompletionsClient supports multi-turn tool calling. The agent loop runs until no more tool calls are returned:
Python
import os
import json
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import (
    SystemMessage,
    UserMessage,
    AssistantMessage,
    ToolMessage,
    ChatCompletionsToolDefinition,
    FunctionDefinition,
)
from azure.core.credentials import AzureKeyCredential

client = ChatCompletionsClient(
    endpoint="https://api.orq.ai/v2/router",
    credential=AzureKeyCredential(os.environ["ORQ_API_KEY"]),
)

tools = [
    ChatCompletionsToolDefinition(
        function=FunctionDefinition(
            name="get_weather",
            description="Get the current weather for a given location.",
            parameters={
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and country, e.g. Amsterdam, NL",
                    }
                },
                "required": ["location"],
            },
        )
    )
]


def get_weather(location: str) -> str:
    data = {
        "amsterdam, nl": "Cloudy, 14°C",
        "london, uk": "Rainy, 11°C",
        "san francisco, us": "Sunny, 18°C",
    }
    return data.get(location.lower(), f"No weather data for {location}")


messages = [
    SystemMessage(content="You are a weather assistant. Always use get_weather to look up weather."),
    UserMessage(content="What's the weather in Amsterdam and London?"),
]

# Agent loop — run until no tool calls remain
while True:
    response = client.complete(
        model="openai/gpt-4o",
        messages=messages,
        tools=tools,
    )

    choice = response.choices[0]

    if choice.finish_reason == "tool_calls":
        messages.append(AssistantMessage(tool_calls=choice.message.tool_calls))
        for tool_call in choice.message.tool_calls:
            args = json.loads(tool_call.function.arguments)
            result = get_weather(args["location"])
            messages.append(ToolMessage(tool_call_id=tool_call.id, content=result))
    else:
        print(choice.message.content)
        break

Model Selection

Switch models by changing the model parameter. All 250+ models are available through the same client:
Python
import os
from azure.ai.inference import ChatCompletionsClient
from azure.ai.inference.models import SystemMessage, UserMessage
from azure.core.credentials import AzureKeyCredential

client = ChatCompletionsClient(
    endpoint="https://api.orq.ai/v2/router",
    credential=AzureKeyCredential(os.environ["ORQ_API_KEY"]),
)

messages = [
    SystemMessage(content="You are a helpful assistant."),
    UserMessage(content="Explain transformer architectures briefly."),
]

# Use Claude
response = client.complete(model="anthropic/claude-sonnet-4-6", messages=messages)

# Use Gemini
response = client.complete(model="google/gemini-2.5-flash", messages=messages)

# Use GPT-4o
response = client.complete(model="openai/gpt-4o", messages=messages)

print(response.choices[0].message.content)

Observability

Overview

Instrument your Azure AI Agents application with OpenTelemetry to send traces to Orq.ai. The azure-core-tracing-opentelemetry package hooks into the Azure SDK’s distributed tracing mechanism, automatically capturing spans for every agent call, thread operation, and LLM invocation.

Prerequisites

  • An Orq.ai account and API Key
  • Azure AI Foundry project with an agent deployed
  • Python 3.9+
  • AZURE_AI_PROJECT_ENDPOINT — your Azure AI Foundry project endpoint
  • AZURE_AI_MODEL_DEPLOYMENT_NAME — the model deployment name in your Foundry project

Install Dependencies

pip install azure-ai-agents azure-identity \
            azure-core-tracing-opentelemetry>=1.0.0b12 \
            opentelemetry-sdk opentelemetry-exporter-otlp

Configuration

Python
import os
from azure.core.settings import settings
from azure.core.tracing.ext.opentelemetry_span import OpenTelemetrySpan
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry import trace

settings.tracing_implementation = OpenTelemetrySpan

tracer_provider = TracerProvider()
tracer_provider.add_span_processor(
    BatchSpanProcessor(
        OTLPSpanExporter(
            endpoint="https://api.orq.ai/v2/otel/v1/traces",
            headers={"Authorization": f"Bearer {os.environ['ORQ_API_KEY']}"},
        )
    )
)
trace.set_tracer_provider(tracer_provider)
tracer = trace.get_tracer(__name__)

Basic Example

Python
import os
from azure.core.settings import settings
from azure.core.tracing.ext.opentelemetry_span import OpenTelemetrySpan
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry import trace
from azure.ai.agents import AgentsClient
from azure.identity import DefaultAzureCredential

settings.tracing_implementation = OpenTelemetrySpan

tracer_provider = TracerProvider()
tracer_provider.add_span_processor(
    BatchSpanProcessor(
        OTLPSpanExporter(
            endpoint="https://api.orq.ai/v2/otel/v1/traces",
            headers={"Authorization": f"Bearer {os.environ['ORQ_API_KEY']}"},
        )
    )
)
trace.set_tracer_provider(tracer_provider)
tracer = trace.get_tracer(__name__)

with AgentsClient(
    endpoint=os.environ["AZURE_AI_PROJECT_ENDPOINT"],
    credential=DefaultAzureCredential()
) as agents_client:
    with tracer.start_as_current_span("azure-agent-workflow"):
        agent = agents_client.create_agent(
            model=os.environ["AZURE_AI_MODEL_DEPLOYMENT_NAME"],
            name="Research Assistant",
            instructions="You are a helpful research assistant."
        )
        thread = agents_client.threads.create()
        agents_client.messages.create(
            thread_id=thread.id,
            role="user",
            content="Summarize the key advantages of transformer architectures."
        )
        run = agents_client.runs.create_and_process(thread_id=thread.id, agent_id=agent.id)
        print(f"Run status: {run.status}")
        agents_client.delete_agent(agent.id)

tracer_provider.force_flush()

View Traces

View your traces in the AI Studio in the Traces tab.
Visit your AI Studio to view real-time analytics and traces.