Evals SDK Reference - Orq.ai Documentation

Evals

All Evals

Get all Evaluators

from orq_ai_sdk import Orq
import os

with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.evals.all(limit=10)

    # Handle response
    print(res)

import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  const result = await orq.evals.all({});

  console.log(result);
}

run();

Show Parameters

{
    "limit": Optional[int],
    "starting_after": Optional[str],
    "ending_before": Optional[str],
    "search": Optional[str],
    "sort": Optional[Literal["asc", "desc"]],
    "project_id": Optional[str],
}

{
  limit?: number;
  startingAfter?: string;
  endingBefore?: string;
  search?: string;
  sort?: string;
  projectId?: string;
}

Show Response

{
    "object": Literal["list"],
    "data": Union[EvaluatorResponseLlm, EvaluatorResponseJSONSchema, EvaluatorResponseHTTP, EvaluatorResponsePython, EvaluatorResponseFunction, EvaluatorResponseRagas, EvaluatorResponseTypescript],
    "has_more": bool,
}

{
  object: string;
  data: string;
  hasMore: boolean;
}

Create an Eval

Create an Evaluator

from orq_ai_sdk import Orq
import os

with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.evals.create(request={
        "code": "<value>",
        "type": "python_eval",
        "path": "Default",
        "description": "",
        "key": "<key>",
    })

    # Handle response
    print(res)

import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  const result = await orq.evals.create({
    code: "<value>",
    type: "python_eval",
    path: "Default",
    description: "",
    key: "<key>",
  });

  console.log(result);
}

run();

Show Parameters

{
    "guardrail_config": Optional[Any],
    "output_type": Optional[Literal["boolean", "categorical", "number", "string"]],
    "type": Literal["llm_eval"],  # required
    "repetitions": OptionalNullable[int],
    "prompt": str,  # required
    "categories": List[str],
    "categorical_labels": {  # optional
        "value": str,  # required
        "description": Optional[str],
    },
    "dataset_id": Optional[str],
    "path": str,  # required
    "description": Optional[str],
    "key": str,  # required
    "mode": Literal["single"],  # required
    "model": str,  # required
}

{
  guardrail_config?: unknown | undefined;
  output_type?: "boolean" | "categorical" | "number" | "string" | undefined;
  type: "llm_eval";  // required
  repetitions?: number | null | undefined;
  prompt: string;  // required
  categories?: string[];
  categorical_labels?: {
    value: string;  // required
    description?: string | undefined;
  };
  dataset_id?: string | undefined;
  path: string;  // required
  description?: string | undefined;
  key: string;  // required
  mode: "single";  // required
  model: string;  // required
}

Show Response

{
    "id": str,
    "description": str,
    "created": Optional[str],
    "updated": Optional[str],
    "updated_by_id": OptionalNullable[str],
    "guardrail_config": Optional[Any],
    "type": Literal["llm_eval"],
    "repetitions": OptionalNullable[int],
    "prompt": str,
    "categories": List[str],
    "categorical_labels": {  # optional
        "value": str,
        "description": Optional[str],
    },
    "dataset_id": Optional[str],
    "key": str,
    "mode": Literal["single", "jury"],
    "model": Optional[str],
    "jury": {  # optional
        "judges": {
            "model": str,
            "retry": {  # optional
                "count": Optional[int],
                "on_codes": List[int],
            },
            "fallbacks": {  # optional
                "model": str,
            },
        },
        "replacement_judges": {  # optional
            "model": str,
            "retry": {  # optional
                "count": Optional[int],
                "on_codes": List[int],
            },
            "fallbacks": {  # optional
                "model": str,
            },
        },
        "min_successful_judges": Optional[int],
        "tie_value": Optional[Literal["Tie"]],
    },
}

{
  id: string;
  description: string;
  created?: string | undefined;
  updated?: string | undefined;
  updated_by_id?: string | null | undefined;
  guardrail_config?: unknown | undefined;
  type: "llm_eval";
  repetitions?: number | null | undefined;
  prompt: string;
  categories?: string[];
  categorical_labels?: {
    value: string;
    description?: string | undefined;
  };
  dataset_id?: string | undefined;
  key: string;
  mode: "single" | "jury";
  model?: string | undefined;
  jury?: {
    judges: {
      model: string;
      retry?: {
        count?: number | undefined;
        on_codes?: number[];
      };
      fallbacks?: {
        model: string;
      };
    };
    replacement_judges?: {
      model: string;
      retry?: {
        count?: number | undefined;
        on_codes?: number[];
      };
      fallbacks?: {
        model: string;
      };
    };
    min_successful_judges?: number | undefined;
    tie_value?: "Tie" | undefined;
  };
}

Update an Eval

Update an Evaluator

from orq_ai_sdk import Orq
import os

with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.evals.update(id="<id>", path="Default")

    # Handle response
    print(res)

import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  const result = await orq.evals.update({
    id: "<id>",
    requestBody: {
      path: "Default",
    },
  });

  console.log(result);
}

run();

Show Parameters

{
    "id": str,  # required
    "type": Optional[str],
    "path": Optional[str],
    "key": Optional[str],
    "description": Optional[str],
    "prompt": Optional[str],
    "output_type": Optional[str],
    "categories": List[str],
    "categorical_labels": {  # optional
        "value": str,  # required
        "description": Optional[str],
    },
    "repetitions": Optional[float],
    "mode": Optional[Literal["single", "jury"]],
    "model": Optional[str],
    "jury": {  # optional
        "judges": {  # required
            "model": str,  # required
            "retry": {  # optional
                "count": Optional[int],
                "on_codes": List[int],
            },
            "fallbacks": {  # optional
                "model": str,  # required
            },
        },
        "replacement_judges": {  # optional
            "model": str,  # required
            "retry": {  # optional
                "count": Optional[int],
                "on_codes": List[int],
            },
            "fallbacks": {  # optional
                "model": str,  # required
            },
        },
        "min_successful_judges": Optional[int],
        "tie_value": Optional[Literal["Tie"]],
    },
    "schema_": Optional[str],
    "url": Optional[str],
    "method": Optional[str],
    "headers": Dict[str, str],
    "payload": Dict[str, Any],
    "code": Optional[str],
    "guardrail_config": Optional[Any],
    "version_increment": Optional[Literal["major", "minor", "patch"]],
    "version_description": Optional[str],
}

{
  id: string;  // required
  requestBody?: {
    type?: string;
    path?: string;
    key?: string;
    description?: string;
    prompt?: string;
    outputType?: string;
    categories?: string[];
    categoricalLabels?: {
      value: string;  // required
      description?: string;
    };
    repetitions?: number;
    mode?: string;
    model?: string;
    jury?: {
      judges: {  // required
        model: string;  // required
        retry?: {
          count?: number;
          onCodes?: number[];
        };
        fallbacks?: {
          model: string;  // required
        };
      };
      replacementJudges?: {
        model: string;  // required
        retry?: {
          count?: number;
          onCodes?: number[];
        };
        fallbacks?: {
          model: string;  // required
        };
      };
      minSuccessfulJudges?: number;
      tieValue?: string;
    };
    schema?: string;
    url?: string;
    method?: string;
    headers?: Record<string, string>;
    payload?: Record<string, any>;
    code?: string;
    guardrailConfig?: any;
    versionIncrement?: string;
    versionDescription?: string;
  };
}

Show Response

{
    "id": str,
    "description": str,
    "created": Optional[str],
    "updated": Optional[str],
    "updated_by_id": OptionalNullable[str],
    "guardrail_config": Optional[Any],
    "type": Literal["llm_eval"],
    "repetitions": OptionalNullable[int],
    "prompt": str,
    "categories": List[str],
    "categorical_labels": {  # optional
        "value": str,
        "description": Optional[str],
    },
    "dataset_id": Optional[str],
    "key": str,
    "mode": Literal["single", "jury"],
    "model": Optional[str],
    "jury": {  # optional
        "judges": {
            "model": str,
            "retry": {  # optional
                "count": Optional[int],
                "on_codes": List[int],
            },
            "fallbacks": {  # optional
                "model": str,
            },
        },
        "replacement_judges": {  # optional
            "model": str,
            "retry": {  # optional
                "count": Optional[int],
                "on_codes": List[int],
            },
            "fallbacks": {  # optional
                "model": str,
            },
        },
        "min_successful_judges": Optional[int],
        "tie_value": Optional[Literal["Tie"]],
    },
}

{
  id: string;
  description: string;
  created?: string | undefined;
  updated?: string | undefined;
  updated_by_id?: string | null | undefined;
  guardrail_config?: unknown | undefined;
  type: "llm_eval";
  repetitions?: number | null | undefined;
  prompt: string;
  categories?: string[];
  categorical_labels?: {
    value: string;
    description?: string | undefined;
  };
  dataset_id?: string | undefined;
  key: string;
  mode: "single" | "jury";
  model?: string | undefined;
  jury?: {
    judges: {
      model: string;
      retry?: {
        count?: number | undefined;
        on_codes?: number[];
      };
      fallbacks?: {
        model: string;
      };
    };
    replacement_judges?: {
      model: string;
      retry?: {
        count?: number | undefined;
        on_codes?: number[];
      };
      fallbacks?: {
        model: string;
      };
    };
    min_successful_judges?: number | undefined;
    tie_value?: "Tie" | undefined;
  };
}

Delete an Eval

Delete an Evaluator

from orq_ai_sdk import Orq
import os

with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    orq.evals.delete(id="<id>")

    # Use the SDK ...

import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  await orq.evals.delete({
    id: "<id>",
  });

}

run();

Show Parameters

{
    "id": str,  # required
}

{
  id: string;  // required
}

Invoke an Eval

Invoke a Custom Evaluator

from orq_ai_sdk import Orq
import os

with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.evals.invoke(id="<id>", messages=[
        {
            "role": "tool",
            "content": [],
        },
    ])

    # Handle response
    print(res)

import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  const result = await orq.evals.invoke({
    id: "<id>",
    requestBody: {
      messages: [
        {
          role: "tool",
          content: [],
        },
      ],
    },
  });

  console.log(result);
}

run();

Show Parameters

{
    "id": str,  # required
    "query": Optional[str],
    "output": Optional[str],
    "reference": Optional[str],
    "retrievals": List[str],
    "messages": {  # optional
        "role": Literal["system", "developer", "assistant", "user", "exception", "tool", "prompt", "correction", "expected_output"],  # required
        "content": Union[str, List[InvokeEvalContent2]],  # required
        "tool_calls": {  # optional
            "id": Optional[str],
            "index": Optional[float],
            "type": Literal["function"],  # required
            "function": {  # required
                "name": str,  # required
                "arguments": str,  # required
            },
        },
        "tool_call_id": OptionalNullable[str],
    },
    "model": Optional[str],
}

{
  id: string;  // required
  requestBody?: {
    query?: string;
    output?: string;
    reference?: string;
    retrievals?: string[];
    messages?: {
      role: string;  // required
      content: string;  // required
      toolCalls?: {
        id?: string;
        index?: number;
        type: string;  // required
        function: {  // required
          name: string;  // required
          arguments: string;  // required
        };
      };
      toolCallId?: string;
    };
    model?: string;
  };
}

Show Response

{
    "type": Literal["string"],
    "original_value": OptionalNullable[str],
    "value": OptionalNullable[str],
}

{
  type: "string";
  original_value?: string | null | undefined;
  value?: string | null | undefined;
}

Evaluators Versions

Returns version history for a specific evaluator

from orq_ai_sdk import Orq
import os

with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.evals.list_versions(id="<id>", limit=10)

    # Handle response
    print(res)

import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  const result = await orq.evals.listVersions({
    id: "<id>",
  });

  console.log(result);
}

run();

Show Parameters

{
    "id": str,  # required
    "limit": Optional[int],
    "starting_after": Optional[str],
    "ending_before": Optional[str],
}

{
  id: string;  // required
  limit?: number;
  startingAfter?: string;
  endingBefore?: string;
}

Show Response

{
    "object": Literal["list"],
    "data": {
        "id": str,
        "created_at": str,
        "updated_at": str,
        "created_by_id": Optional[str],
        "updated_by_id": Optional[str],
        "version": str,
        "description": Optional[str],
        "checksum": str,
        "entity_type": str,
        "entity_id": str,
        "data": Dict[str, Any],
        "workspace_id": str,
    },
    "has_more": bool,
}

{
  object: string;
  data: {
    id: string;
    createdAt: string;
    updatedAt: string;
    createdById?: string;
    updatedById?: string;
    version: string;
    description?: string;
    checksum: string;
    entityType: string;
    entityId: string;
    data: Record<string, any>;
    workspaceId: string;
  };
  hasMore: boolean;
}

​Evals

​All Evals

​Create an Eval

​Update an Eval

​Delete an Eval

​Invoke an Eval

​Evaluators Versions

Evals

All Evals

Create an Eval

Update an Eval

Delete an Eval

Invoke an Eval

Evaluators Versions