Using Datasets via the API
Introduction
orq.ai exposes API to manipulate Datasets. These APIs are used to manage and enrich Datasets programmatically. In this page we'll see the common use cases for creating, enriching and fetching datasets through the API.
Prerequisite
To get started, an API key is needed to use within SDKs or HTTP API.
To get an API key ready, see Authentication.
SDKs
Creating a Dataset
To create a Dataset, we'll use the Create Dataset API.
The following information are required to create a Dataset:
- a unique name.
- the path within the orq.ai workspace (see Projects).
curl --request POST \
--url https://api.orq.ai/v2/datasets \
--header 'accept: application/json' \
--header 'authorization: Bearer ORQ_API_KEY' \
--header 'content-type: application/json' \
--data '
{
"display_name": "MyDataset",
"path": "Default"
}
'
from orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.datasets.create(request={
"display_name": "bad_reviewed_logs",
"path": "Default",
})
assert res is not None
# Handle response
print(res)
import { Orq } from "@orq-ai/node";
const orq = new Orq({
apiKey: process.env["ORQ_API_KEY"] ?? "",
});
async function run() {
const result = await orq.datasets.create({
displayName: "bad_reviewed_logs",
path: "Default",
});
console.log(result);
}
run();
The API responds with the following payload:
The API call will return a dataset_id that is used in the next calls.
{
"display_name": "MyDataset",
"path": "Default",
"_id": "<dataset_id>",
"workspace_id": "<workspace_id>",
"created": "2025-06-05T13:16:24.865Z",
"updated": "2025-06-05T13:16:24.865Z",
"created_by_id": null,
"updated_by_id": null,
"project_id": "<project_id>",
"metadata": {
"total_versions": 0,
"datapoints_count": 0
}
}
Adding Datapoints to a Dataset
Datapoints are entries in a Dataset. You can add between 1 and 5,000 datapoints in a single API request.
To create datapoints, we'll use the Create Datapoints API
The expected payload contains:
- The previously acquired dataset ID
- An array of datapoints, where each contains:
- Inputs – Variables that can be used in the prompt message, e.g.,
{{firstname}}
- Messages – The prompt template, structured with system, user, and assistant roles
- Expected Output – Reference responses that evaluators use to compare against newly generated outputs
- Inputs – Variables that can be used in the prompt message, e.g.,
curl --request POST \
--url https://api.orq.ai/v2/datasets/DATASET_ID/datapoints \
--header 'accept: application/json' \
--header 'authorization: Bearer ORQ_API_KEY' \
--header 'content-type: application/json' \
--data '[
{
"inputs": {"country": "France"},
"messages": [
{"role": "user", "content": "Capital of {{country}}?"},
{"role": "assistant", "content": "Paris"}
],
"expected_output": "Paris"
},
{
"inputs": {"country": "Germany"},
"messages": [
{"role": "user", "content": "Capital of {{country}}?"},
{"role": "assistant", "content": "Berlin"}
],
"expected_output": "Berlin"
},
{
"inputs": {"country": "Spain"},
"messages": [
{"role": "user", "content": "Capital of {{country}}?"},
{"role": "assistant", "content": "Madrid"}
],
"expected_output": "Madrid"
}
]'
from orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.datasets.create_datapoints(
dataset_id="DATASET_ID",
request_body=[
{
"inputs": {"country": "France"},
"messages": [
{"role": "user", "content": "Capital of {{country}}?"},
{"role": "assistant", "content": "Paris"}
],
"expected_output": "Paris"
},
{
"inputs": {"country": "Germany"},
"messages": [
{"role": "user", "content": "Capital of {{country}}?"},
{"role": "assistant", "content": "Berlin"}
],
"expected_output": "Berlin"
},
{
"inputs": {"country": "Spain"},
"messages": [
{"role": "user", "content": "Capital of {{country}}?"},
{"role": "assistant", "content": "Madrid"}
],
"expected_output": "Madrid"
}
]
)
print(f"Created {len(res)} datapoints")
import { Orq } from "@orq-ai/node";
const orq = new Orq({
apiKey: process.env["ORQ_API_KEY"] ?? "",
});
await orq.datasets.createDatapoints({
datasetId: "DATASET_ID",
datapoints: [
{
inputs: { country: "France" },
messages: [
{ role: "user", content: "Capital of {{country}}?" },
{ role: "assistant", content: "Paris" }
],
expectedOutput: "Paris"
},
{
inputs: { country: "Germany" },
messages: [
{ role: "user", content: "Capital of {{country}}?" },
{ role: "assistant", content: "Berlin" }
],
expectedOutput: "Berlin"
},
{
inputs: { country: "Spain" },
messages: [
{ role: "user", content: "Capital of {{country}}?" },
{ role: "assistant", content: "Madrid" }
],
expectedOutput: "Madrid"
}
]
});
Batch Limits
- Minimum: 1 datapoint per request
- Maximum: 5,000 datapoints per request
- Requests with more than 500 datapoints are automatically processed in optimized chunks
Large Batch Example
For datasets with many entries, you can programmatically generate and submit datapoints:
from orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
# Generate 1000 datapoints programmatically
datapoints = []
for i in range(1000):
datapoints.append({
"inputs": {
"number": i,
"operation": "square"
},
"messages": [
{"role": "user", "content": f"What is {i} squared?"},
{"role": "assistant", "content": f"{i} squared is {i**2}"}
],
"expected_output": str(i**2)
})
# Create all datapoints in one request
res = orq.datasets.create_datapoints(
dataset_id="DATASET_ID",
datapoints=datapoints
)
print(f"Created {len(res)} datapoints")
Listing Datasets
List Datasets using the List Datasets API.
curl --request GET \
--url https://api.orq.ai/v2/datasets \
--header 'accept: application/json' \
--header 'authorization: Bearer ORQ_API_KEY'
from orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.datasets.list(limit=10)
assert res is not None
# Handle response
print(res)
import { Orq } from "@orq-ai/node";
const orq = new Orq({
apiKey: process.env["ORQ_API_KEY"] ?? "",
});
async function run() {
const result = await orq.datasets.list({});
console.log(result);
}
run();
The following response is sent by the API:
{
"object": "list",
"data": [
{
"_id": "<dataset_id>",
"created": "2024-10-04T05:21:16.992Z",
"created_by_id": "<user_id>",
"display_name": "demo-collection",
"metadata": {
"total_versions": 0,
"datapoints_count": 0
},
"parent_id": null,
"project_id": "<project_id>",
"updated": "2024-10-04T05:21:16.992Z",
"updated_by_id": "<user_id>",
"version": null,
"workspace_id": "<workspace_id>"
}
]
}
Fetching a Dataset
Fetch a Dataset using the Retrieve a Dataset API.
Replace here DATASET_ID with a previously acquired dataset ID
curl --request GET \
--url https://api.orq.ai/v2/datasets/DATASET_ID \
--header 'accept: application/json' \
--header 'authorization: Bearer ORQ_API_KEY'
from orq_ai_sdk import Orq
import os
with Orq(
api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
res = orq.datasets.retrieve(dataset_id="<id>")
assert res is not None
# Handle response
print(res)
import { Orq } from "@orq-ai/node";
const orq = new Orq({
apiKey: process.env["ORQ_API_KEY"] ?? "",
});
async function run() {
const result = await orq.datasets.retrieve({
datasetId: "<id>",
});
console.log(result);
}
run();
The following response is sent by the API.
{
"_id": "<dataset_id>",
"display_name": "MyDataset",
"path": "Default",
"workspace_id": "<workspace_id>",
"created": "2025-06-05T13:16:24.865Z",
"updated": "2025-06-05T13:16:24.865Z",
"created_by_id": null,
"updated_by_id": null,
"project_id": "<project_id>",
"metadata": {
"total_versions": 0,
"datapoints_count": 4
}
}
Once a Dataset is created and populated with Datapoints, it can used in Experiment, to learn more see Creating an Experiment.
Updated 3 days ago