Evals
All Evals
Get all Evaluatorsfrom orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.evals.all(limit=10)
# Handle response
print(res)
Show Parameters
Show Parameters
{
"limit": Optional[int],
"starting_after": Optional[str],
"ending_before": Optional[str],
"search": Optional[str],
"sort": Optional[Literal["asc", "desc"]],
}
Show Response
Show Response
{
"object": Literal["list"],
"data": Union[DataLLM, DataJSON, DataHTTP, DataPython, DataFunction, DataRagas, DataTypescript],
"has_more": bool,
}
Create an Eval
Create an Evaluatorfrom orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.evals.create(request={
"code": "<value>",
"type": "python_eval",
"path": "Default",
"description": "",
"key": "<key>",
})
# Handle response
print(res)
Show Parameters
Show Parameters
{
"guardrail_config": Union[CreateEvalGuardrailConfigBoolean, CreateEvalGuardrailConfigCategorical, CreateEvalGuardrailConfigNumber],
"output_type": Optional[Literal["boolean", "categorical", "number", "string"]],
"type": Literal["llm_eval"], # required
"repetitions": OptionalNullable[int],
"prompt": str, # required
"categories": List[str],
"categorical_labels": { # optional
"value": str, # required
"description": Optional[str],
},
"dataset_id": Optional[str],
"path": str, # required
"description": Optional[str],
"key": str, # required
"mode": Literal["single"], # required
"model": str, # required
}
Show Response
Show Response
{
"id": str,
"description": str,
"created": Optional[str],
"updated": Optional[str],
"updated_by_id": OptionalNullable[str],
"guardrail_config": Union[CreateEvalGuardrailConfigEvalsResponse200ApplicationJSONResponseBody1LLMBoolean, CreateEvalGuardrailConfigEvalsResponse200ApplicationJSONResponseBody1LLMCategorical, CreateEvalGuardrailConfigEvalsResponse200ApplicationJSONResponseBody1LLMNumber],
"type": Literal["llm_eval"],
"repetitions": OptionalNullable[int],
"prompt": str,
"categories": List[str],
"categorical_labels": { # optional
"value": str,
"description": Optional[str],
},
"dataset_id": Optional[str],
"key": str,
"mode": Literal["single"],
"model": str,
}
Update an Eval
Update an Evaluatorfrom orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.evals.update(id="<id>", path="Default")
# Handle response
print(res)
Show Parameters
Show Parameters
{
"id": str, # required
"type": Optional[str],
"path": Optional[str],
"key": Optional[str],
"description": Optional[str],
"prompt": Optional[str],
"output_type": Optional[str],
"categories": List[str],
"categorical_labels": { # optional
"value": str, # required
"description": Optional[str],
},
"repetitions": Optional[float],
"mode": Optional[Literal["single", "jury"]],
"model": Optional[str],
"jury": { # optional
"judges": { # required
"model": str, # required
"retry": { # optional
"count": Optional[int],
"on_codes": List[int],
},
"fallbacks": { # optional
"model": str, # required
},
},
"replacement_judges": { # optional
"model": str, # required
"retry": { # optional
"count": Optional[int],
"on_codes": List[int],
},
"fallbacks": { # optional
"model": str, # required
},
},
"min_successful_judges": Optional[int],
"tie_value": Optional[Literal["Tie"]],
},
"schema_": Optional[str],
"url": Optional[str],
"method": Optional[str],
"headers": Dict[str, str],
"payload": Dict[str, Any],
"code": Optional[str],
"guardrail_config": Union[Boolean, Categorical, Number],
"version_increment": Optional[Literal["major", "minor", "patch"]],
"version_description": Optional[str],
}
Show Response
Show Response
{
"id": str,
"description": str,
"created": Optional[str],
"updated": Optional[str],
"updated_by_id": OptionalNullable[str],
"guardrail_config": Union[UpdateEvalGuardrailConfigEvalsResponse200ApplicationJSONResponseBody1Boolean, UpdateEvalGuardrailConfigEvalsResponse200ApplicationJSONResponseBody1Categorical, UpdateEvalGuardrailConfigEvalsResponse200ApplicationJSONResponseBody1Number],
"type": Literal["llm_eval"],
"repetitions": OptionalNullable[int],
"prompt": str,
"categories": List[str],
"categorical_labels": { # optional
"value": str,
"description": Optional[str],
},
"dataset_id": Optional[str],
"key": str,
"mode": Literal["single"],
"model": str,
}
Delete an Eval
Delete an Evaluatorfrom orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
orq.evals.delete(id="<id>")
# Use the SDK ...
Show Parameters
Show Parameters
{
"id": str, # required
}
Invoke an Eval
Invoke a Custom Evaluatorfrom orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.evals.invoke(id="<id>", messages=[
{
"role": "tool",
"content": [],
},
])
# Handle response
print(res)
Show Parameters
Show Parameters
{
"id": str, # required
"query": Optional[str],
"output": Optional[str],
"reference": Optional[str],
"retrievals": List[str],
"messages": { # optional
"role": Literal["system", "developer", "assistant", "user", "exception", "tool", "prompt", "correction", "expected_output"], # required
"content": Union[str, List[Two]], # required
"tool_calls": { # optional
"id": Optional[str],
"index": Optional[float],
"type": Literal["function"], # required
"function": { # required
"name": str, # required
"arguments": str, # required
},
},
"tool_call_id": OptionalNullable[str],
},
"model": Optional[str],
}
Show Response
Show Response
{
"type": Literal["string"],
"original_value": OptionalNullable[str],
"value": OptionalNullable[str],
}
Evaluators Versions
Returns version history for a specific evaluatorfrom orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.evals.list_versions(id="<id>", limit=10)
# Handle response
print(res)
Show Parameters
Show Parameters
{
"id": str, # required
"limit": Optional[int],
"starting_after": Optional[str],
"ending_before": Optional[str],
}
Show Response
Show Response
{
"object": Literal["list"],
"data": {
"id": str,
"created_at": str,
"updated_at": str,
"created_by_id": Optional[str],
"updated_by_id": Optional[str],
"version": str,
"description": Optional[str],
"checksum": str,
"entity_type": str,
"entity_id": str,
"data": Dict[str, Any],
"workspace_id": str,
},
"has_more": bool,
}