Cohere - Prompt Management

This article guides you through integrating your SaaS with orq.ai and Cohere using our Python SDK. By the end of the article, you'll know how to set up a Deployment in orq.ai, perform prompt engineering, request a Deployment variant using our SDK code generator, send a payload to Cohere, and add additional information to the request.

This guide shows you how to integrate your products with Cohere using orq.ai Python SDK. For the longer blog article, see: Integrate orq.ai with Cohere using Python SDK

Step 1: Install the SDK

# orquesta sdk
pip install orquesta-sdk

# cohere
pip install cohere
// orquesta sdk
npm install @orquesta/node --save
yarn add @orquesta/node

// cohere 
npm i -s cohere-ai

Step 2: Enable models in the Model Garden

Orq.ai allows you to pick and enable the models of your choice and work with them. Enabling a model(s) is very easy; all you have to do is navigate to the Model Garden and toggle on the model of your choice.

Step 3: Execute prompt

You can find your orq.ai API Key in your workspace: <https://my.orquesta.dev/><workspace-name>/settings/developers

import cohere

from orquesta_sdk import Orquesta, OrquestaClientOptions

api_key = "ORQUESTA_API_KEY"

# Initialize the Orquesta Client
options = OrquestaClientOptions(
    api_key=api_key,
    environment="production",
)

client = Orquesta(options)

# Get deployment configuration
config = client.deployments.get_config(
  key="Deplyment-with-Cohere",
  context={ "environments": [ "production" ], "locale": [ "en" ] },
  metadata={"custom-field-name":"custom-metadata-value"}
)

deployment_config = config.to_dict()

# Convert the array of messages to a single string
prompt = "\n".join(message['content'] for message in deployment_config['messages'])

# Initialize the Cohere client
co = cohere.Client('COHERE_API_KEY') # Insert your Cohere API key
completion = co.generate(
    model=deployment_config['model'],
    prompt=prompt,
    temperature = deployment_config['parameters']['temperature'],
    max_tokens = deployment_config['parameters']['maxTokens'],
)

print(completion)
import { CohereClient } from 'cohere-ai';

const start = async () => {
  const { createClient } = await import('@orquesta/node');

  // Initialize Orquesta client
  const client = createClient({
    apiKey: 'ORQUESTA_API_KEY',
    environment: 'production',
  });

  // Getting the deployment config
  const deploymentConfig = await client.deployments.getConfig({
    key: 'Deplyment-with-Cohere',
    context: {
      environments: ['production'],
      locale: ['en'],
    },
    metadata: {
      'custom-field-name': 'custom-metadata-value',
    },
  });
  console.log(deploymentConfig);

  const deploymentConfigObj: any = deploymentConfig;

  // Convert the array of messages to a single string
  const prompt: string = deploymentConfigObj.messages
    .map((message: any) => message.content)
    .join('\n');

  // Send the request to Cohere
  const cohereApiKey: string = 'COHERE_API_KEY';

  const cohere = new CohereClient({
    token: cohereApiKey,
  });

  (async () => {
    // Generate completion using Cohere
    const prediction = await cohere.generate({
      prompt: prompt,
      model: deploymentConfigObj.model,
      maxTokens: deploymentConfigObj.parameters.maxTokens,
      temperature: deploymentConfigObj.parameters.temperature,
    });

    console.log('Received prediction', prediction);
  })();
};

// Call the async function
start();

Step 4: Report analytics back to orq.ai

Add metrics to each transaction using the add_metrics method. Below, you can find an example with all the supersets of metrics that can be reported.

deployment.add_metrics(
  chain_id="c4a75b53-62fa-401b-8e97-493f3d299316",
  conversation_id="ee7b0c8c-eeb2-43cf-83e9-a4a49f8f13ea",
  user_id="e3a202a6-461b-447c-abe2-018ba4d04cd0",
  feedback={"score": 100},
  metadata={
      "custom": "custom_metadata",
      "chain_id": "ad1231xsdaABw",
  },
  usage={
      "prompt_tokens": 100,
      "completion_tokens": 900,
      "total_tokens": 1000,
  },
  performance={
      "latency": 9000,
      "time_to_first_token": 250,
  }
)
deployment.addMetrics({
  chain_id: "c4a75b53-62fa-401b-8e97-493f3d299316",
  conversation_id: "ee7b0c8c-eeb2-43cf-83e9-a4a49f8f13ea",
  user_id: "e3a202a6-461b-447c-abe2-018ba4d04cd0",
  feedback: {
    score: 100
  },
  metadata: {
    custom: "custom_metadata",
    chain_id: "ad1231xsdaABw"
  },
  usage: {
    prompt_tokens: 100,
    completion_tokens: 900,
    total_tokens: 1000
  },
  performance: {
    latency: 9000,
    time_to_first_token: 250
  }
})