Skip to content
Open
13 changes: 13 additions & 0 deletions crates/adaptive/tests/unit/acg/telemetry_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,7 @@ fn test_anthropic_cache_telemetry_event_reconstructs_total_prompt_tokens() {
total_tokens: None,
cache_read_tokens: Some(500),
cache_write_tokens: Some(200),
cost: None,
};

let event = CacheTelemetryEvent::from_usage(
Expand Down Expand Up @@ -196,6 +197,7 @@ fn test_anthropic_cache_telemetry_event_maps_write_only_zero_read_to_cold_start(
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(700),
cost: None,
};

let event = CacheTelemetryEvent::from_usage(
Expand All @@ -221,6 +223,7 @@ fn test_anthropic_cache_telemetry_event_returns_none_without_prompt_tokens() {
total_tokens: None,
cache_read_tokens: Some(500),
cache_write_tokens: Some(200),
cost: None,
};

let event = CacheTelemetryEvent::from_usage(
Expand All @@ -243,6 +246,7 @@ fn test_openai_cache_telemetry_event_normalizes_creation_tokens_to_zero() {
total_tokens: None,
cache_read_tokens: Some(600),
cache_write_tokens: Some(999),
cost: None,
};

let event = CacheTelemetryEvent::from_usage(
Expand Down Expand Up @@ -270,6 +274,7 @@ fn test_openai_cache_telemetry_event_maps_zero_read_to_unknown() {
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(999),
cost: None,
};

let event = CacheTelemetryEvent::from_usage(
Expand Down Expand Up @@ -303,6 +308,7 @@ fn telemetry_observability_keeps_request_facts_optional_for_anthropic_unknown_mi
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(0),
cost: None,
};

let event = CacheTelemetryEvent::from_usage(
Expand Down Expand Up @@ -340,6 +346,7 @@ fn test_from_usage_uses_prefix_mismatch_diagnosis_when_request_facts_are_availab
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(0),
cost: None,
};
let request_facts = CacheRequestFacts {
provider: "openai".to_string(),
Expand Down Expand Up @@ -408,6 +415,7 @@ fn test_cache_miss_diagnosis_prefix_mismatch_is_bounded_and_serialized() {
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(0),
cost: None,
},
sample_timestamp(),
Some(&request_facts),
Expand Down Expand Up @@ -475,6 +483,7 @@ fn test_cache_miss_diagnosis_below_minimum_threshold_reports_exact_token_counts(
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(0),
cost: None,
},
sample_timestamp(),
Some(&request_facts),
Expand Down Expand Up @@ -529,6 +538,7 @@ fn test_cache_miss_diagnosis_retention_expired_reports_gap_and_window() {
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(0),
cost: None,
},
sample_timestamp(),
Some(&request_facts),
Expand Down Expand Up @@ -584,6 +594,7 @@ fn test_cache_miss_diagnosis_unknown_preserves_missing_facts() {
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(0),
cost: None,
},
sample_timestamp(),
Some(&request_facts),
Expand Down Expand Up @@ -636,6 +647,7 @@ fn test_no_write_anthropic_cache_miss_diagnosis_uses_threshold_facts_without_loc
total_tokens: None,
cache_read_tokens: Some(0),
cache_write_tokens: Some(0),
cost: None,
},
sample_timestamp(),
Some(&request_facts),
Expand Down Expand Up @@ -676,6 +688,7 @@ fn test_anthropic_multi_breakpoint_telemetry_event_uses_normalized_usage_totals(
total_tokens: None,
cache_read_tokens: Some(900),
cache_write_tokens: Some(600),
cost: None,
},
sample_timestamp(),
Some(&request_facts),
Expand Down
1 change: 1 addition & 0 deletions crates/adaptive/tests/unit/drain_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,7 @@ fn test_accumulator_extracts_annotated_response() {
total_tokens: Some(150),
cache_read_tokens: None,
cache_write_tokens: None,
cost: None,
}),
api_specific: None,
extra: serde_json::Map::new(),
Expand Down
125 changes: 124 additions & 1 deletion crates/cli/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ pub(crate) enum Command {
Config(ConfigCommand),
/// Create or edit plugin configuration (writes `plugins.toml`)
Plugins(PluginsCommand),
/// Validate and configure model pricing catalogs.
Pricing(PricingCommand),
/// Diagnose env, agents, config, observability (optionally scoped to one agent)
Doctor(DoctorCommand),
/// List supported and locally-detected agents (use `--json` for machine output)
Expand Down Expand Up @@ -162,6 +164,93 @@ pub(crate) enum PluginsSubcommand {
Edit(PluginsEditCommand),
}

/// Args for `nemo-relay pricing`.
#[derive(Debug, Clone, Args)]
pub(crate) struct PricingCommand {
#[command(subcommand)]
pub(crate) command: PricingSubcommand,
}

/// Pricing catalog and resolver subcommands.
#[derive(Debug, Clone, Subcommand)]
pub(crate) enum PricingSubcommand {
/// Validate a pricing catalog JSON file.
Validate(PricingValidateCommand),
/// Initialize the pricing plugin component in `plugins.toml`.
Init(PricingInitCommand),
/// Add a pricing catalog file source to `plugins.toml`.
AddSource(PricingAddSourceCommand),
/// Resolve which pricing entry matches a model and optional usage.
Resolve(PricingResolveCommand),
}

/// Common target-scope flags for pricing config mutations.
#[derive(Debug, Clone, Default, Args)]
#[command(group(
ArgGroup::new("pricing_scope")
.args(["user", "project", "global"])
.multiple(false)
))]
pub(crate) struct PricingScopeArgs {
/// Edit the user config at `$XDG_CONFIG_HOME/nemo-relay/plugins.toml`.
#[arg(long)]
pub(crate) user: bool,
/// Edit the nearest project config at `.nemo-relay/plugins.toml`.
#[arg(long)]
pub(crate) project: bool,
/// Edit the system config at `/etc/nemo-relay/plugins.toml`.
#[arg(long)]
pub(crate) global: bool,
}

/// Args for `nemo-relay pricing validate`.
#[derive(Debug, Clone, Args)]
pub(crate) struct PricingValidateCommand {
/// Path to a Relay pricing catalog JSON file.
pub(crate) path: PathBuf,
}

/// Args for `nemo-relay pricing init`.
#[derive(Debug, Clone, Args)]
pub(crate) struct PricingInitCommand {
#[command(flatten)]
pub(crate) scope: PricingScopeArgs,
}

/// Args for `nemo-relay pricing add-source`.
#[derive(Debug, Clone, Args)]
pub(crate) struct PricingAddSourceCommand {
#[command(flatten)]
pub(crate) scope: PricingScopeArgs,
/// Path to a Relay pricing catalog JSON file.
pub(crate) path: PathBuf,
/// Append as a lower-priority source instead of prepending as the highest-priority override.
#[arg(long)]
pub(crate) append: bool,
}

/// Args for `nemo-relay pricing resolve`.
#[derive(Debug, Clone, Args)]
pub(crate) struct PricingResolveCommand {
/// Model ID or routed model name to look up.
pub(crate) model: String,
/// Optional provider or route, such as `openai`, `anthropic`, or `azure/openai`.
#[arg(long)]
pub(crate) provider: Option<String>,
/// Prompt/input token count to use for an estimate.
#[arg(long)]
pub(crate) prompt_tokens: Option<u64>,
/// Completion/output token count to use for an estimate.
#[arg(long)]
pub(crate) completion_tokens: Option<u64>,
/// Prompt-cache read token count to use for an estimate.
#[arg(long)]
pub(crate) cache_read_tokens: Option<u64>,
/// Prompt-cache write token count to use for an estimate.
#[arg(long)]
pub(crate) cache_write_tokens: Option<u64>,
}

/// Args for `nemo-relay plugins edit`.
#[derive(Debug, Clone, Default, Args)]
#[command(group(
Expand Down Expand Up @@ -898,13 +987,47 @@ fn merge_plugin_components(left: &mut toml::Value, right: toml::Value) {
.iter_mut()
.find(|candidate| component_kind(candidate) == Some(kind.as_str()))
{
merge_toml(existing, component);
if kind == "pricing" {
merge_pricing_component(existing, component);
} else {
merge_toml(existing, component);
}
} else {
left_components.push(component);
}
}
}

fn merge_pricing_component(existing: &mut toml::Value, higher_priority: toml::Value) {
let lower_priority_sources = pricing_component_sources(existing).cloned();
let higher_priority_sources = pricing_component_sources(&higher_priority).cloned();
merge_toml(existing, higher_priority);

let Some(mut sources) = higher_priority_sources else {
return;
};
if let Some(lower_priority_sources) = lower_priority_sources {
sources.extend(lower_priority_sources);
}
set_pricing_component_sources(existing, sources);
}

fn pricing_component_sources(component: &toml::Value) -> Option<&Vec<toml::Value>> {
component
.get("config")
.and_then(|config| config.get("sources"))
.and_then(toml::Value::as_array)
}

fn set_pricing_component_sources(component: &mut toml::Value, sources: Vec<toml::Value>) {
if let Some(config) = component
.get_mut("config")
.and_then(toml::Value::as_table_mut)
{
config.insert("sources".into(), toml::Value::Array(sources));
}
}

fn component_kind(component: &toml::Value) -> Option<&str> {
component
.as_table()
Expand Down
86 changes: 86 additions & 0 deletions crates/cli/src/doctor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use std::time::Duration;

use futures_util::SinkExt;
use nemo_relay::api::event::{BaseEvent, Event, MarkEvent};
use nemo_relay::codec::pricing::{PricingCatalog, PricingConfig, PricingSourceConfig};
use nemo_relay::observability::plugin_component::OBSERVABILITY_PLUGIN_KIND;
use nemo_relay::plugin::{DiagnosticLevel, PluginConfig, validate_plugin_config};
use nemo_relay_adaptive::plugin_component::register_adaptive_component;
Expand All @@ -30,6 +31,7 @@ use crate::config::{
use crate::error::CliError;

const NETWORK_TIMEOUT: Duration = Duration::from_secs(2);
const PRICING_PLUGIN_KIND: &str = "pricing";

/// Outcome of one check inside the doctor report. The `details` field carries human-readable
/// supplementary text; the `status` is the bottom-line signal callers (and CI) use to decide
Expand Down Expand Up @@ -635,6 +637,7 @@ async fn collect_observability(gateway: &GatewayConfig) -> Vec<Check> {
details: "component not configured".into(),
});
}
collect_pricing_component_checks(&mut checks, &plugin_config);

checks
}
Expand Down Expand Up @@ -716,6 +719,89 @@ fn observability_component_config(plugin_value: &Value) -> Option<&Value> {
.and_then(|component| component.get("config"))
}

fn collect_pricing_component_checks(checks: &mut Vec<Check>, plugin_config: &PluginConfig) {
let Some(component) = plugin_config
.components
.iter()
.find(|component| component.kind == PRICING_PLUGIN_KIND)
else {
checks.push(Check {
name: "Pricing",
status: Status::Info,
details: "component not configured".into(),
});
return;
};

if !component.enabled {
checks.push(Check {
name: "Pricing",
status: Status::Info,
details: "component disabled".into(),
});
return;
}

let config =
match serde_json::from_value::<PricingConfig>(Value::Object(component.config.clone())) {
Ok(config) => config,
Err(error) => {
checks.push(Check {
name: "Pricing",
status: Status::Fail,
details: format!("invalid config: {error}"),
});
return;
}
};

if config.sources.is_empty() {
checks.push(Check {
name: "Pricing",
status: Status::Info,
details: "component configured with no sources".into(),
});
return;
}

for (index, source) in config.sources.iter().enumerate() {
checks.push(pricing_source_check(index, source));
}
}

fn pricing_source_check(index: usize, source: &PricingSourceConfig) -> Check {
match source {
PricingSourceConfig::Inline { catalog } => Check {
name: "Pricing source",
status: Status::Pass,
details: format!("inline:{index} valid ({} entries)", catalog.entries.len()),
},
PricingSourceConfig::File { path } => match std::fs::read_to_string(path) {
Ok(raw) => match PricingCatalog::from_json_str(&raw) {
Ok(catalog) => Check {
name: "Pricing source",
status: Status::Pass,
details: format!(
"file:{} valid ({} entries)",
path.display(),
catalog.entries.len()
),
},
Err(error) => Check {
name: "Pricing source",
status: Status::Fail,
details: format!("file:{} invalid catalog: {error}", path.display()),
},
},
Err(error) => Check {
name: "Pricing source",
status: Status::Fail,
details: format!("file:{} unreadable: {error}", path.display()),
},
},
}
}

fn section_enabled(config: &Value, section: &str) -> bool {
config
.get(section)
Expand Down
Loading
Loading