> ## Documentation Index
> Fetch the complete documentation index at: https://docs.orq.ai/llms.txt
> Use this file to discover all available pages before exploring further.

> Create a new datasource within a knowledge base. Upload files or connect external sources to populate your RAG knowledge base with content.

# Create a new datasource



## OpenAPI

````yaml post /v2/knowledge/{knowledge_id}/datasources
openapi: 3.1.0
info:
  title: orq.ai API
  version: '2.0'
  description: orq.ai API documentation
servers:
  - url: https://api.orq.ai
security:
  - ApiKey: []
tags:
  - name: Guardrail Rules
  - name: Policies
  - name: Routing Rules
  - name: Files
    description: File upload and retrieval operations.
  - name: FilesService
  - name: Projects
    description: Projects organize resources within a workspace
  - name: ProjectsService
  - name: Skills
    description: >-
      Skills are modular instructions you can use to codify processes and
      conventions
  - name: SkillsService
  - name: Responses
  - description: >-
      Run agents on a cadence — cron, interval, or one-off. Minimum firing
      interval is 1 hour.
    name: Agent Schedules
  - name: Reporting
    description: >-
      GenAI reporting API over canonical analytics rollups. Accepts a metric
      name, time range, grain, group-by, and filters; returns a typed time
      series and optional totals.
  - name: ReportingService
    description: |-
      ReportingService exposes a single QueryReport RPC that maps allowlisted
       analytics payloads onto safe rollup queries. Callers never send SQL;
       the backend picks the rollup family and grain from the metric
       catalogue, the requested range, and the requested grouping.
externalDocs:
  url: https://docs.orq.ai
  description: orq.ai Documentation
paths:
  /v2/knowledge/{knowledge_id}/datasources:
    post:
      tags:
        - Knowledge Bases
      summary: Create a new datasource
      operationId: CreateDatasource
      parameters:
        - schema:
            type: string
            description: The unique identifier of the knowledge base
          required: true
          description: The unique identifier of the knowledge base
          name: knowledge_id
          in: path
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              properties:
                display_name:
                  type: string
                  description: >-
                    The display name for the datasource visible in the UI. If
                    omitted, the display name is derived from the uploaded file.
                    When both `display_name` and `file_id` are provided, the
                    provided `display_name` is prioritized.
                file_id:
                  type: string
                  description: >-
                    The unique identifier of the file used for datasource
                    creation. If provided, the file is immediately queued for
                    chunking.
                chunking_options:
                  type: object
                  properties:
                    chunking_configuration:
                      oneOf:
                        - type: object
                          properties:
                            type:
                              type: string
                              enum:
                                - default
                          required:
                            - type
                          description: >-
                            Optimized chunking strategy focusing on speed and
                            avoiding duplication of content chunks.
                        - type: object
                          properties:
                            type:
                              type: string
                              enum:
                                - advanced
                            chunk_max_characters:
                              type: number
                              default: 500
                              description: >-
                                Defines the absolute maximum character length
                                per chunk. Text elements exceeding this size
                                will be automatically split into multiple
                                chunks.
                            chunk_overlap:
                              type: number
                              default: 0
                              description: >-
                                Specifies the number of characters to overlap
                                between consecutive chunks. This overlap helps
                                maintain semantic continuity when splitting
                                large text elements.
                          required:
                            - type
                          description: >-
                            Provides advanced settings for customizing chunking
                            behavior, enabling fine-grained control to better
                            meet specific data processing needs.
                      description: >-
                        The chunking configuration settings for the datasource.
                        Defaults to the system's standard chunking configuration
                        if not specified.
                    chunking_cleanup_options:
                      type: object
                      properties:
                        delete_emails:
                          type: boolean
                          description: Removes email addresses from the provided text.
                        delete_credit_cards:
                          type: boolean
                          description: Removes credit card numbers from the provided text.
                        delete_phone_numbers:
                          type: boolean
                          description: Removes phone numbers from the provided text.
                        clean_bullet_points:
                          type: boolean
                          description: Removes bullet points formatting from the text.
                        clean_numbered_list:
                          type: boolean
                          description: Removes numbered list formatting from the text.
                        clean_unicode:
                          type: boolean
                          description: >-
                            Normalizes or removes unnecessary unicode characters
                            from the text.
                        clean_dashes:
                          type: boolean
                          description: >-
                            Normalizes or removes various dash characters to
                            standardize the text.
                        clean_whitespaces:
                          type: boolean
                          description: >-
                            Trims and normalizes excessive whitespace throughout
                            the text.
                      description: >-
                        The cleanup options applied to the datasource content.
                        All options are enabled by default to ensure enhanced
                        security and optimal chunk quality. Defaults to
                        system-standard cleanup options if not specified.
                  description: >-
                    Configuration options specifying how the datasource file is
                    chunked. Required if `file_id` is specified. Defaults to
                    standard chunking options if omitted.
      responses:
        '200':
          description: Datasource successfully created
          content:
            application/json:
              schema:
                type: object
                properties:
                  _id:
                    type: string
                    format: ulid
                    pattern: ^[0-9A-HJKMNP-TV-Z]{26}$
                    default: 01KRM5REJP7MCRDWZM3Y518DV7
                    readOnly: true
                    description: The unique identifier of the data source
                  display_name:
                    type: string
                    description: >-
                      The display name of the datasource. Normally the name of
                      the uploaded file
                  description:
                    type: string
                    description: The description of the knowledge base
                  status:
                    type: string
                    enum:
                      - pending
                      - processing
                      - completed
                      - failed
                      - queued
                  file_id:
                    type:
                      - string
                      - 'null'
                    description: >-
                      The unique identifier of the file used to create the
                      datasource.
                  created:
                    type: string
                    description: The date and time the datasource was created
                  updated:
                    type: string
                    description: The date and time the datasource was updated
                  created_by_id:
                    type:
                      - string
                      - 'null'
                    format: uuid
                    pattern: >-
                      ^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$
                    readOnly: true
                    description: The user ID of the creator of the knowledge base
                  update_by_id:
                    type:
                      - string
                      - 'null'
                    format: uuid
                    pattern: >-
                      ^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$
                    readOnly: true
                    description: >-
                      The user ID of the last user who updated the knowledge
                      base
                  knowledge_id:
                    type: string
                    description: The unique identifier of the knowledge base
                  chunks_count:
                    type: number
                    description: The number of chunks in the datasource
                required:
                  - display_name
                  - status
                  - created
                  - updated
                  - knowledge_id
                  - chunks_count
        '500':
          description: Failed to create datasource
components:
  securitySchemes:
    ApiKey:
      type: http
      scheme: bearer
      bearerFormat: JWT

````