Noveum · mramanindia · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 23, 2025
diff --git a/noveum_customer_support_bt/README_workflow.md b/noveum_customer_support_bt/README_workflow.md
@@ -0,0 +1,81 @@
+# Noveum Platform Score Upload Workflow
+
+## Prerequisites
+- Virtual environment activated
+- .env file configured with API credentials
+- Processed dataset available
+- Evaluation results CSV available
+
+## Step-by-Step Commands
+
+### Step 1: Setup Environment
+```bash
+cd /Users/mramanindia/work/NovaEval
+source .venv/bin/activate
+cd noveum_customer_support_bt
+```
+
+### Step 2: Create Dataset
+```bash
+python create_dataset.py --dataset-type agent --description "Customer Support Agent Evaluation Dataset" --pretty
+```
+**Note**: After this step, update your .env file with the returned dataset slug if different.
+
+### Step 3: Create Dataset Version
+```bash
+python create_dataset_version.py --pretty
+```
+
+### Step 4: Upload Dataset Items
+```bash
+python upload_dataset.py --dataset-json processed_datasets/agent.rag_evaluation_metrics_dataset_processed_dataset.json --item-type conversation
+```
+
+### Step 5: Publish Dataset Version
+```bash
+python publish_dataset_version.py --pretty
+```
+
+### Step 6: Upload Evaluation Scores
+
+#### Option A: Upload All Scores Separately
+```bash
+# Task Progression Scores
+python upload_scores.py demo_results/agent.rag_evaluation_metrics_dataset/agent_evaluation_results.csv --item-key-col turn_id --score-col task_progression --reasoning-col task_progression_reasoning --scorer-id task_progression_scorer --scorer-version 1.0.0
+
+# Context Relevancy Scores
+python upload_scores.py demo_results/agent.rag_evaluation_metrics_dataset/agent_evaluation_results.csv --item-key-col turn_id --score-col context_relevancy --reasoning-col context_relevancy_reasoning --scorer-id context_relevancy_scorer --scorer-version 1.0.0
+
+# Role Adherence Scores
+python upload_scores.py demo_results/agent.rag_evaluation_metrics_dataset/agent_evaluation_results.csv --item-key-col turn_id --score-col role_adherence --reasoning-col role_adherence_reasoning --scorer-id role_adherence_scorer --scorer-version 1.0.0
+
+# Tool Relevancy Scores
+python upload_scores.py demo_results/agent.rag_evaluation_metrics_dataset/agent_evaluation_results.csv --item-key-col turn_id --score-col tool_relevancy --reasoning-col tool_relevancy_reasoning --scorer-id tool_relevancy_scorer --scorer-version 1.0.0
+
+# Parameter Correctness Scores
+python upload_scores.py demo_results/agent.rag_evaluation_metrics_dataset/agent_evaluation_results.csv --item-key-col turn_id --score-col parameter_correctness --reasoning-col parameter_correctness_reasoning --scorer-id parameter_correctness_scorer --scorer-version 1.0.0
+```
+
+#### Option B: Test with Dry Run First
+Add `--dry-run` flag to any upload command to test without actually uploading:
+```bash
+python upload_scores.py demo_results/agent.rag_evaluation_metrics_dataset/agent_evaluation_results.csv --item-key-col turn_id --score-col task_progression --reasoning-col task_progression_reasoning --scorer-id task_progression_scorer --scorer-version 1.0.0 --dry-run
+```
+
+## Environment Variables Required
+Make sure your .env file contains:
+- NOVEUM_API_KEY
+- NOVEUM_ORG_SLUG
+- NOVEUM_DATASET_SLUG
+- NOVEUM_DATASET_NAME
+- LATEST_VERSION
+- NOVEUM_PROJECT
+- NOVEUM_ENVIRONMENT
+- BETA (true/false)
+
+## Troubleshooting
+- If dataset creation fails, check if dataset already exists
+- If upload fails, verify the JSON format matches expected schema
+- Use --pretty flag for better formatted output
+- Check API responses for specific error messages
+
diff --git a/noveum_customer_support_bt/api_data.json b/noveum_customer_support_bt/api_data.json
@@ -0,0 +1,84 @@
+{
+  "items": [
+    {
+      "item_key": "eda4fe22-9a2b-4b73-856b-f4f3309bf719",
+      "item_id": "item_1"
+    },
+    {
+      "item_key": "0ffffba1-8a37-443c-8866-d53ffbfa7718",
+      "item_id": "item_2"
+    },
+    {
+      "item_key": "f1f37bd7-0851-4659-b493-b80d3800d920",
+      "item_id": "item_3"
+    },
+    {
+      "item_key": "43cdf081-4f01-49cd-b566-dbd1619e6cd2",
+      "item_id": "item_4"
+    },
+    {
+      "item_key": "9a1983f4-09da-4b53-80e6-38de6878e0e7",
+      "item_id": "item_5"
+    },
+    {
+      "item_key": "5d2517e1-220a-429d-9d59-f701bda25eed",
+      "item_id": "item_6"
+    },
+    {
+      "item_key": "52aacb67-c361-4445-9b72-c157f79f47d6",
+      "item_id": "item_7"
+    },
+    {
+      "item_key": "a81ca3a8-80aa-4c39-876e-8d40ea7a0aef",
+      "item_id": "item_8"
+    },
+    {
+      "item_key": "230aad27-f3dd-4968-a45a-3c2f07ac28ed",
+      "item_id": "item_9"
+    },
+    {
+      "item_key": "83c7dcce-3d89-4da1-8b3f-d419885d4cbc",
+      "item_id": "item_10"
+    },
+    {
+      "item_key": "2218f641-604c-491a-9710-b51a9941b982",
+      "item_id": "item_11"
+    },
+    {
+      "item_key": "255fd49c-84b4-4b18-887e-6308a412d535",
+      "item_id": "item_12"
+    },
+    {
+      "item_key": "dc511122-c0b6-415c-9a49-c7b45132dd87",
+      "item_id": "item_13"
+    },
+    {
+      "item_key": "04bebf38-a343-4563-80db-0154bef8d927",
+      "item_id": "item_14"
+    },
+    {
+      "item_key": "5e043630-6493-42b5-beb8-79faa19bfa37",
+      "item_id": "item_15"
+    },
+    {
+      "item_key": "7da9814d-a2e8-4c4e-b750-68b26bd5fd22",
+      "item_id": "item_16"
+    },
+    {
+      "item_key": "16143f74-2831-4753-b33d-ce4b645093c5",
+      "item_id": "item_17"
+    },
+    {
+      "item_key": "fc64e6cc-6739-4256-ac4a-7b80c3028233",
+      "item_id": "item_18"
+    },
+    {
+      "item_key": "b7945c49-f584-4c70-972d-536a805d8a31",
+      "item_id": "item_19"
+    },
+    {
+      "item_key": "f5c40ecf-36c0-45ba-9cc9-dc0329b0324b",
+      "item_id": "item_20"
+    }
+  ]
+}
diff --git a/noveum_customer_support_bt/create_dataset.py b/noveum_customer_support_bt/create_dataset.py
@@ -0,0 +1,161 @@
+#!/usr/bin/env python3
+"""
+Script to create a new dataset in Noveum API.
+Creates a dataset with the specified name and type.
+"""
+
+import os
+import json
+import requests
+import argparse
+from dotenv import load_dotenv
+from typing import Dict, Any, Optional
+
+# Load environment variables
+load_dotenv()
+
+# Get API credentials from environment
+api_key = os.getenv('NOVEUM_API_KEY')
+org_slug = os.getenv('NOVEUM_ORG_SLUG')
+dataset_slug = os.getenv('NOVEUM_DATASET_SLUG')
+dataset_name = os.getenv('NOVEUM_DATASET_SLUG')
+beta_env = os.getenv('BETA', 'false').lower() == 'true'
+
+def validate_environment():
+    """Validate that all required environment variables are set"""
+    required_vars = {
+        'NOVEUM_API_KEY': api_key,
+        'NOVEUM_ORG_SLUG': org_slug,
+        'NOVEUM_DATASET_SLUG': dataset_slug,
+        'NOVEUM_DATASET_NAME': dataset_name
+    }
+
+    missing_vars = [var for var, value in required_vars.items() if not value]
+
+    if missing_vars:
+        print(f"Error: Missing required environment variables: {', '.join(missing_vars)}")
+        print("Please set these variables in your .env file or environment")
+        return False
+
+    return True
+
+def create_dataset(dataset_type: str, description: str = "", visibility: str = "org", environment: str = "") -> Optional[Dict[str, Any]]:
+    """Create a new dataset in Noveum API"""
+
+    # Construct API URL based on BETA environment variable
+    if beta_env:
+        api_url = f"https://noveum.ai/api/v1/datasets"
+    else:
+        api_url = f"https://noveum.ai/api/v1/organizations/{org_slug}/datasets"
+
+    # Prepare headers
+    headers = {
+        'Content-Type': 'application/json',
+        'Authorization': f'Bearer {api_key}',
+        'Cookie': f'apiKeyCookie={api_key}'
+    }
+
+    # Prepare request data
+    request_data = {
+        "name": dataset_name,
+        "slug": dataset_name,  # Will be auto-generated by the API
+        "description": description,
+        "visibility": visibility,
+        "dataset_type": dataset_type,
+        "environment": environment
+    }
+
+    print(f"Creating dataset at: {api_url}")
+    print(f"Organization: {org_slug}")
+    print(f"Dataset name: {dataset_name}")
+    print(f"Dataset type: {dataset_type}")
+    print(f"Description: {description}")
+    print(f"Visibility: {visibility}")
+    print(f"Environment: {environment}")
+
+    try:
+        response = requests.post(api_url, headers=headers, json=request_data, timeout=30)
+        response.raise_for_status()
+
+        data = response.json()
+        print("Successfully created dataset")
+        print(f"Response status: {response.status_code}")
+
+        return data
+
+    except requests.exceptions.RequestException as e:
+        print(f"Error creating dataset: {e}")
+        if hasattr(e, 'response') and e.response is not None:
+            print(f"Response status: {e.response.status_code}")
+            print(f"Response text: {e.response.text}")
+        return None
+
+def main():
+    # Define valid dataset types
+    valid_dataset_types = ['agent', 'conversational', 'g-eval', 'custom']
+
+    parser = argparse.ArgumentParser(description='Create a new dataset in Noveum API')
+    parser.add_argument('--dataset-type', type=str, default='agent',
+                       choices=valid_dataset_types,
+                       help=f'Type of the dataset. Must be one of: {", ".join(valid_dataset_types)} (default: agent)')
+    parser.add_argument('--description', type=str, default="",
+                       help='Description of the dataset (default: empty string)')
+    parser.add_argument('--visibility', type=str, default="org",
+                       help='Visibility of the dataset (default: org)')
+    parser.add_argument('--environment', type=str, default="",
+                       help='Environment for the dataset (default: empty string)')
+    parser.add_argument('--pretty', action='store_true',
+                       help='Pretty print the JSON response')
+    parser.add_argument('--output', type=str, default="dataset_creation_response.json",
+                       help='Output file to save the JSON response (default: dataset_creation_response.json)')
+
+    args = parser.parse_args()
+
+    # Print warning about dataset slug
+    print("⚠️  WARNING: Please update the dataset slug in your .env file after creating the dataset!")
+    print("   The API will return a slug that you should set as NOVEUM_DATASET_SLUG in your .env file.")
+    print()
+
+    # Validate environment variables
+    if not validate_environment():
+        return 1
+
+    # Create dataset
+    data = create_dataset(
+        dataset_type=args.dataset_type,
+        description=args.description,
+        visibility=args.visibility,
+        environment=args.environment
+    )
+
+    if data is None:
+        return 1
+
+    # Save response to file
+    try:
+        with open(args.output, 'w', encoding='utf-8') as f:
+            json.dump(data, f, indent=2)
+        print(f"\nResponse saved to: {args.output}")
+    except (OSError, IOError) as e:
+        print(f"Error saving response to file: {e}")
+        return 1
+
+    # Print the response
+    if args.pretty:
+        print("\nResponse data:")
+        print(json.dumps(data, indent=2))
+    else:
+        print(f"\nResponse data: {json.dumps(data)}")
+
+    # Extract and display the dataset slug if available
+    if isinstance(data, dict) and 'slug' in data:
+        print(f"\n📝 IMPORTANT: Update your .env file with:")
+        print(f"   NOVEUM_DATASET_SLUG={data['slug']}")
+    elif isinstance(data, dict) and 'data' in data and isinstance(data['data'], dict) and 'slug' in data['data']:
+        print(f"\n📝 IMPORTANT: Update your .env file with:")
+        print(f"   NOVEUM_DATASET_SLUG={data['data']['slug']}")
+
+    return 0
+
+if __name__ == "__main__":
+    exit(main())