Using Datasets via the API

Introduction

orq.ai exposes API to manipulate Datasets. These APIs are used to manage and enrich Datasets programmatically. In this page we'll see the common use cases for creating, enriching and fetching datasets through the API.

Prerequisite

To get started, an API key is needed to use within SDKs or HTTP API.

📘

To get an API key ready, see Authentication.

SDKs

Creating a Dataset

To create a Dataset, we'll use the Create Dataset API.

The following information are required to create a Dataset:

  • a unique name.
  • the path within the orq.ai workspace (see Projects).
curl --request POST \
     --url https://api.orq.ai/v2/datasets \
     --header 'accept: application/json' \
     --header 'authorization: Bearer ORQ_API_KEY' \
     --header 'content-type: application/json' \
     --data '
{
  "display_name": "MyDataset",
  "path": "Default"
}
'
from orq_ai_sdk import Orq
import os


with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.datasets.create(request={
        "display_name": "bad_reviewed_logs",
        "path": "Default",
    })

    assert res is not None

    # Handle response
    print(res)
import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  const result = await orq.datasets.create({
    displayName: "bad_reviewed_logs",
    path: "Default",
  });

  console.log(result);
}

run();

The API responds with the following payload:

The API call will return a dataset_id that is used in the next calls.

{
  "display_name": "MyDataset",
  "path": "Default",
  "_id": "<dataset_id>",
  "workspace_id": "<workspace_id>",
  "created": "2025-06-05T13:16:24.865Z",
  "updated": "2025-06-05T13:16:24.865Z",
  "created_by_id": null,
  "updated_by_id": null,
  "project_id": "<project_id>",
  "metadata": {
    "total_versions": 0,
    "datapoints_count": 0
  }
}

Adding Datapoints to a Dataset

Datapoints are entries in a Dataset. You can add between 1 and 5,000 datapoints in a single API request.

To create datapoints, we'll use the Create Datapoints API

The expected payload contains:

  • The previously acquired dataset ID
  • An array of datapoints, where each contains:
    • Inputs – Variables that can be used in the prompt message, e.g., {{firstname}}
    • Messages – The prompt template, structured with system, user, and assistant roles
    • Expected Output – Reference responses that evaluators use to compare against newly generated outputs
curl --request POST \
       --url https://api.orq.ai/v2/datasets/DATASET_ID/datapoints \
       --header 'accept: application/json' \
       --header 'authorization: Bearer ORQ_API_KEY' \
       --header 'content-type: application/json' \
       --data '[
    {
      "inputs": {"country": "France"},
      "messages": [
        {"role": "user", "content": "Capital of {{country}}?"},
        {"role": "assistant", "content": "Paris"}
      ],
      "expected_output": "Paris"
    },
    {
      "inputs": {"country": "Germany"},
      "messages": [
        {"role": "user", "content": "Capital of {{country}}?"},
        {"role": "assistant", "content": "Berlin"}
      ],
      "expected_output": "Berlin"
    },
    {
      "inputs": {"country": "Spain"},
      "messages": [
        {"role": "user", "content": "Capital of {{country}}?"},
        {"role": "assistant", "content": "Madrid"}
      ],
      "expected_output": "Madrid"
    }
  ]'
from orq_ai_sdk import Orq
import os

with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.datasets.create_datapoints(
        dataset_id="DATASET_ID",
        request_body=[
            {
                "inputs": {"country": "France"},
                "messages": [
                    {"role": "user", "content": "Capital of {{country}}?"},
                    {"role": "assistant", "content": "Paris"}
                ],
                "expected_output": "Paris"
            },
            {
                "inputs": {"country": "Germany"},
                "messages": [
                    {"role": "user", "content": "Capital of {{country}}?"},
                    {"role": "assistant", "content": "Berlin"}
                ],
                "expected_output": "Berlin"
            },
            {
                "inputs": {"country": "Spain"},
                "messages": [
                    {"role": "user", "content": "Capital of {{country}}?"},
                    {"role": "assistant", "content": "Madrid"}
                ],
                "expected_output": "Madrid"
            }
        ]
    )
    
    print(f"Created {len(res)} datapoints")
import { Orq } from "@orq-ai/node";

  const orq = new Orq({
    apiKey: process.env["ORQ_API_KEY"] ?? "",
  });

await orq.datasets.createDatapoints({
      datasetId: "DATASET_ID",
      datapoints: [
        {
          inputs: { country: "France" },
          messages: [
            { role: "user", content: "Capital of {{country}}?" },
            { role: "assistant", content: "Paris" }
          ],
          expectedOutput: "Paris"
        },
        {
          inputs: { country: "Germany" },
          messages: [
            { role: "user", content: "Capital of {{country}}?" },
            { role: "assistant", content: "Berlin" }
          ],
          expectedOutput: "Berlin"
        },
        {
          inputs: { country: "Spain" },
          messages: [
            { role: "user", content: "Capital of {{country}}?" },
            { role: "assistant", content: "Madrid" }
          ],
          expectedOutput: "Madrid"
        }
      ]
    });

Batch Limits

  • Minimum: 1 datapoint per request
  • Maximum: 5,000 datapoints per request
  • Requests with more than 500 datapoints are automatically processed in optimized chunks

Large Batch Example

For datasets with many entries, you can programmatically generate and submit datapoints:

from orq_ai_sdk import Orq
import os

with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:
    
    # Generate 1000 datapoints programmatically
    datapoints = []
    for i in range(1000):
        datapoints.append({
            "inputs": {
                "number": i,
                "operation": "square"
            },
            "messages": [
                {"role": "user", "content": f"What is {i} squared?"},
                {"role": "assistant", "content": f"{i} squared is {i**2}"}
            ],
            "expected_output": str(i**2)
        })
    
    # Create all datapoints in one request
    res = orq.datasets.create_datapoints(
        dataset_id="DATASET_ID",
        datapoints=datapoints
    )
    
    print(f"Created {len(res)} datapoints")

Listing Datasets

List Datasets using the List Datasets API.

curl --request GET \
     --url https://api.orq.ai/v2/datasets \
     --header 'accept: application/json' \
     --header 'authorization: Bearer ORQ_API_KEY'
from orq_ai_sdk import Orq
import os


with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.datasets.list(limit=10)

    assert res is not None

    # Handle response
    print(res)
import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  const result = await orq.datasets.list({});

  console.log(result);
}

run();

The following response is sent by the API:

{
  "object": "list",
  "data": [
    {
      "_id": "<dataset_id>",
      "created": "2024-10-04T05:21:16.992Z",
      "created_by_id": "<user_id>",
      "display_name": "demo-collection",
      "metadata": {
        "total_versions": 0,
        "datapoints_count": 0
      },
      "parent_id": null,
      "project_id": "<project_id>",
      "updated": "2024-10-04T05:21:16.992Z",
      "updated_by_id": "<user_id>",
      "version": null,
      "workspace_id": "<workspace_id>"
    }
 ]
}

Fetching a Dataset

Fetch a Dataset using the Retrieve a Dataset API.

Replace here DATASET_ID with a previously acquired dataset ID

curl --request GET \
     --url https://api.orq.ai/v2/datasets/DATASET_ID \
     --header 'accept: application/json' \
     --header 'authorization: Bearer ORQ_API_KEY'
from orq_ai_sdk import Orq
import os


with Orq(
    api_key=os.getenv("ORQ_API_KEY", ""),
) as orq:

    res = orq.datasets.retrieve(dataset_id="<id>")

    assert res is not None

    # Handle response
    print(res)
import { Orq } from "@orq-ai/node";

const orq = new Orq({
  apiKey: process.env["ORQ_API_KEY"] ?? "",
});

async function run() {
  const result = await orq.datasets.retrieve({
    datasetId: "<id>",
  });

  console.log(result);
}

run();

The following response is sent by the API.

{
  "_id": "<dataset_id>",
  "display_name": "MyDataset",
  "path": "Default",
  "workspace_id": "<workspace_id>",
  "created": "2025-06-05T13:16:24.865Z",
  "updated": "2025-06-05T13:16:24.865Z",
  "created_by_id": null,
  "updated_by_id": null,
  "project_id": "<project_id>",
  "metadata": {
    "total_versions": 0,
    "datapoints_count": 4
  }
}


👍

Once a Dataset is created and populated with Datapoints, it can used in Experiment, to learn more see Creating an Experiment.