From 539b9b8097a296f1321240da7d816d0893259312 Mon Sep 17 00:00:00 2001 From: yishuiliunian Date: Thu, 25 Jun 2026 18:18:13 +0800 Subject: [PATCH] =?UTF-8?q?fix(routing):=20=E6=A0=B9=E6=B2=BB=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E8=B7=AF=E7=94=B1=C3=97provider=E8=A7=A3=E6=9E=90,?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20compaction=20=E9=9D=99=E9=BB=98=E5=A4=B1?= =?UTF-8?q?=E6=95=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 纯 GPT 部署下 /compact 与 auto-compact 从不生效:摘要任务被 build_model_router 硬编码默认成 claude-haiku,但无 Anthropic provider, 解析失败被静默成 Ok(false)。生产 session 因此上下文涨到 68万 token 顶 25.6万 窗口、237 turn 0 次成功压缩,切 gpt-5.4/5.5 都无效。 根因是"选模型→找 provider"两步各自决定、provider-盲默认散落、失败 处理不一致。本次从架构上收敛: - Kernel::resolve_task 成为唯一 provider 感知解析咽喉;compaction 与 classifier 都走它(classifier 经 ProviderResolver trait 委托同一实现) - 删 build_model_router 的硬编码摘要默认;未配置任务回落主模型 (ModelRouter::resolve 既有 fallback),省钱靠显式 model_routing.summarization - SharedModelRouter/ModelRouterReader 读写句柄分离:runner 持写、 classifier 持只读,共享单一 Arc,/model 切换被双方实时观测, single-writer 由类型固化(修 classifier stale) - 启动期 preflight 校验 settings.model + model_routing,逐条不可解析报 error/warn(软警告不 bail);provider 缺失时 compaction 发可见 Stream 提示而非终止语义的 Error(避免子 agent 视图被弹回 root) - 卫生:hook prompt 删 claude-haiku 默认(无 model 则 skip); DEFAULT_MODEL 提取为 loopal-config SSOT 测试:新增 resolve_task/is_model_resolvable/preflight/SharedModelRouter 单测 + compaction provider-error e2e + reader 观测 /model 切换 e2e; 全量 94/94 通过、clippy 零警告、所有文件 ≤200 行。 --- crates/loopal-agent-server/src/agent_setup.rs | 8 +- .../src/agent_setup_context.rs | 4 + .../src/agent_setup_helpers.rs | 13 +- crates/loopal-agent-server/src/params.rs | 24 +++ .../src/session_handlers_factory.rs | 13 +- .../loopal-agent-server/src/session_start.rs | 5 + .../tests/suite/hub_harness.rs | 30 +--- .../suite/session_handlers_factory_test.rs | 68 ++++---- .../tests/suite/summarization_default_test.rs | 16 +- crates/loopal-config/src/settings/core.rs | 5 +- .../tests/suite/executor_factory_test.rs | 14 +- crates/loopal-kernel/BUILD.bazel | 1 + crates/loopal-kernel/src/hook_factory.rs | 12 +- crates/loopal-kernel/src/kernel/mod.rs | 1 + crates/loopal-kernel/src/kernel/resolve.rs | 29 ++++ crates/loopal-kernel/src/lib.rs | 2 + crates/loopal-kernel/src/preflight.rs | 53 ++++++ crates/loopal-kernel/tests/suite.rs | 2 + .../tests/suite/kernel_resolve_test.rs | 164 ++++++++++++++++++ crates/loopal-provider-api/src/lib.rs | 2 +- .../loopal-provider-api/src/model_router.rs | 103 +++++++++++ crates/loopal-provider/src/router.rs | 8 + .../tests/suite/router_test.rs | 17 ++ .../src/agent_loop/cold_start_emit.rs | 2 +- .../src/agent_loop/compaction_run.rs | 29 +++- crates/loopal-runtime/src/agent_loop/llm.rs | 14 +- .../src/agent_loop/llm_params.rs | 5 +- .../src/agent_loop/llm_record.rs | 2 +- .../loopal-runtime/src/agent_loop/params.rs | 13 +- .../loopal-runtime/src/agent_loop/runner.rs | 2 +- .../src/agent_loop/turn_recover.rs | 2 +- .../src/agent_loop/turn_telemetry.rs | 5 +- .../compact_provider_error_e2e_test.rs | 66 +++++++ .../tests/agent_loop/context_budget_test.rs | 2 +- .../tests/agent_loop/input_edge_test.rs | 4 +- .../agent_loop/input_emit_fail_edge_test.rs | 2 +- crates/loopal-runtime/tests/agent_loop/mod.rs | 1 + .../tests/agent_loop/model_routing_test.rs | 29 +++- .../tests/agent_loop/retry_cancel_test.rs | 4 +- .../tests/agent_loop/run_test.rs | 2 +- crates/loopal-test-support/src/wiring.rs | 2 +- 41 files changed, 635 insertions(+), 145 deletions(-) create mode 100644 crates/loopal-kernel/src/kernel/resolve.rs create mode 100644 crates/loopal-kernel/src/preflight.rs create mode 100644 crates/loopal-kernel/tests/suite/kernel_resolve_test.rs create mode 100644 crates/loopal-runtime/tests/agent_loop/compact_provider_error_e2e_test.rs diff --git a/crates/loopal-agent-server/src/agent_setup.rs b/crates/loopal-agent-server/src/agent_setup.rs index 5af03089..5e30e2d7 100644 --- a/crates/loopal-agent-server/src/agent_setup.rs +++ b/crates/loopal-agent-server/src/agent_setup.rs @@ -1,6 +1,6 @@ use crate::agent_setup_context::AgentSetupContext; use crate::agent_setup_helpers::{ - build_fork_synthetic_turn, build_microcompact_idle, build_model_router, collect_feature_tags, + build_fork_synthetic_turn, build_microcompact_idle, collect_feature_tags, spawn_sub_agent_forwarder, }; use crate::params::AgentSetupResult; @@ -24,11 +24,9 @@ pub async fn build_with_frontend(ctx: AgentSetupContext<'_>) -> anyhow::Result { pub decision_context: loopal_runtime::frontend::DecisionContext, pub decision_cell: loopal_runtime::frontend::DecisionCell, pub session_id: &'a str, + pub router: SharedModelRouter, } impl<'a> AgentSetupContext<'a> { @@ -57,6 +59,7 @@ impl<'a> AgentSetupContext<'a> { decision_context: loopal_runtime::frontend::DecisionContext, decision_cell: loopal_runtime::frontend::DecisionCell, session_id: &'a str, + router: SharedModelRouter, ) -> Self { Self { cwd, @@ -72,6 +75,7 @@ impl<'a> AgentSetupContext<'a> { decision_context, decision_cell, session_id, + router, } } } diff --git a/crates/loopal-agent-server/src/agent_setup_helpers.rs b/crates/loopal-agent-server/src/agent_setup_helpers.rs index d48a57f9..fdd0bf66 100644 --- a/crates/loopal-agent-server/src/agent_setup_helpers.rs +++ b/crates/loopal-agent-server/src/agent_setup_helpers.rs @@ -6,20 +6,17 @@ use std::time::Duration; use loopal_config::{CompactionSettings, ResolvedConfig, Settings}; use loopal_protocol::{AgentEvent, AgentEventPayload}; -use loopal_provider_api::{ContentBlock, Message, MessageRole, ModelRouter, TaskType}; +use loopal_provider_api::{ContentBlock, Message, MessageRole, ModelRouter}; use loopal_runtime::frontend::traits::AgentFrontend; use loopal_turn::{InjectionKind, Turn, TurnOutcome, TurnStep, TurnTrigger}; use crate::params::StartParams; -const DEFAULT_SUMMARIZATION_MODEL: &str = "claude-haiku-4-5-20251001"; - +/// No hardcoded per-task default: unconfigured tasks fall back to the main +/// model via `ModelRouter::resolve` — a pinned default could reference an +/// unconfigured provider and silently break compaction. pub fn build_model_router(settings: &Settings) -> ModelRouter { - let mut routing = settings.model_routing.clone(); - routing - .entry(TaskType::Summarization) - .or_insert_with(|| DEFAULT_SUMMARIZATION_MODEL.to_string()); - ModelRouter::from_parts(settings.model.clone(), routing) + ModelRouter::from_parts(settings.model.clone(), settings.model_routing.clone()) } /// Resolve the microcompact idle duration. Logs a hint when the user has diff --git a/crates/loopal-agent-server/src/params.rs b/crates/loopal-agent-server/src/params.rs index 68ef3d9e..9764ca53 100644 --- a/crates/loopal-agent-server/src/params.rs +++ b/crates/loopal-agent-server/src/params.rs @@ -17,6 +17,7 @@ pub struct AgentSetupResult { pub agent_shared: Arc, } +#[derive(Default)] pub struct StartParams { #[allow(dead_code)] pub cwd: Option, @@ -97,6 +98,7 @@ pub async fn build_kernel_from_config( ) .await; let kernel = Arc::new(kernel); + log_unresolvable_routing(&kernel); if production { spawn_proxy_mcp_settle_poll(Arc::downgrade(&kernel)); @@ -105,6 +107,28 @@ pub async fn build_kernel_from_config( Ok(kernel) } +/// Logging policy for unresolvable routes: the unreachable main model is an +/// error (LLM calls will fail), a bad `model_routing` entry is a warning (it +/// falls back to the main model). Soft by design — never aborts startup, since +/// sub-agent/proxy and test paths legitimately register fewer providers. +fn log_unresolvable_routing(kernel: &Kernel) { + for entry in kernel.unresolvable_models() { + match entry.slot { + loopal_kernel::RoutedSlot::MainModel => tracing::error!( + model = %entry.model, + "configured main model resolves to no registered provider; \ + LLM calls will fail — configure its provider or set `model`" + ), + loopal_kernel::RoutedSlot::Task(task) => tracing::warn!( + model = %entry.model, + ?task, + "model_routing entry resolves to no registered provider; \ + that task will fail when invoked" + ), + } + } +} + pub fn build_kernel_with_provider( provider: Arc, ) -> anyhow::Result> { diff --git a/crates/loopal-agent-server/src/session_handlers_factory.rs b/crates/loopal-agent-server/src/session_handlers_factory.rs index 130d4fa4..b791faeb 100644 --- a/crates/loopal-agent-server/src/session_handlers_factory.rs +++ b/crates/loopal-agent-server/src/session_handlers_factory.rs @@ -3,7 +3,7 @@ use std::sync::Arc; use loopal_config::ResolvedConfig; use loopal_decision_api::DecisionMode; use loopal_kernel::Kernel; -use loopal_provider_api::{ModelRouter, Provider, ProviderResolver, TaskType}; +use loopal_provider_api::{ModelRouterReader, Provider, ProviderResolver, TaskType}; use loopal_runtime::frontend::permission_handler::PermissionHandler; use loopal_runtime::frontend::question_handler::QuestionHandler; use loopal_runtime::frontend::traits::EventEmitter; @@ -16,7 +16,7 @@ use crate::ipc_handlers::{IpcPermissionHandler, IpcQuestionHandler, SessionRef}; struct KernelProviderResolver { kernel: Arc, - router: ModelRouter, + router: ModelRouterReader, } impl ProviderResolver for KernelProviderResolver { @@ -24,9 +24,7 @@ impl ProviderResolver for KernelProviderResolver { &self, task: TaskType, ) -> Result<(String, Arc), loopal_error::LoopalError> { - let model = self.router.resolve(task).to_string(); - let provider = self.kernel.resolve_provider(&model)?; - Ok((model, provider)) + self.kernel.resolve_task(&self.router.read(), task) } } @@ -35,6 +33,7 @@ pub fn build_session_handlers( kernel: &Arc, session: SessionRef, context: DecisionContext, + router: ModelRouterReader, ) -> ( Box, Box, @@ -61,10 +60,6 @@ pub fn build_session_handlers( )) .with_question_system_prompt(config.classifier_prompt.clone()), ); - let router = ModelRouter::from_parts( - config.settings.model.clone(), - config.settings.model_routing.clone(), - ); let resolver: Arc = Arc::new(KernelProviderResolver { kernel: kernel.clone(), router, diff --git a/crates/loopal-agent-server/src/session_start.rs b/crates/loopal-agent-server/src/session_start.rs index 48c68112..021e0b37 100644 --- a/crates/loopal-agent-server/src/session_start.rs +++ b/crates/loopal-agent-server/src/session_start.rs @@ -10,11 +10,13 @@ use loopal_protocol::InterruptSignal; use loopal_runtime::agent_input::AgentInput; use crate::agent_setup; +use crate::agent_setup_helpers::build_model_router; use crate::hub_frontend::HubFrontend; use crate::session_handlers_factory::build_session_handlers; use crate::session_hub::{SessionHub, SharedSession}; use crate::session_spawn::{parse_start_params, spawn_agent_and_bridges}; use crate::session_start_prompt::push_prompt_envelope; +use loopal_provider_api::SharedModelRouter; pub(crate) struct SessionHandle { pub session_id: String, @@ -94,11 +96,13 @@ pub(crate) async fn start_session( )); let decision_context = loopal_runtime::frontend::DecisionContext::with_cwd(cwd.to_string_lossy().into_owned()); + let model_router = SharedModelRouter::new(build_model_router(&config.settings)); let (perm_handler, q_handler, decision_cell) = build_session_handlers( &config, &kernel, session_holder.clone(), decision_context.clone(), + model_router.reader(), ); let frontend_placeholder = Arc::new(HubFrontend::new( session_holder, @@ -126,6 +130,7 @@ pub(crate) async fn start_session( decision_context, decision_cell, &preset_session_id, + model_router, )) .await?; let agent_params = setup.params; diff --git a/crates/loopal-agent-server/tests/suite/hub_harness.rs b/crates/loopal-agent-server/tests/suite/hub_harness.rs index d1deac73..f9c1233f 100644 --- a/crates/loopal-agent-server/tests/suite/hub_harness.rs +++ b/crates/loopal-agent-server/tests/suite/hub_harness.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use std::time::Duration; -use tokio::sync::{Mutex, mpsc, watch}; +use tokio::sync::{mpsc, watch}; use loopal_error::AgentOutput; use loopal_ipc::StdioTransport; @@ -19,7 +19,7 @@ use loopal_test_support::mock_provider::MultiCallProvider; use loopal_test_support::scenarios::Calls; use loopal_agent_server::testing::{ - AgentInput, SharedSession, StartParams, build_kernel_with_provider, + AgentInput, SharedSession, StartParams, build_kernel_with_provider, build_model_router, }; pub const T: Duration = Duration::from_secs(10); @@ -108,14 +108,11 @@ pub async fn build_hub_harness_with( let (watch_tx, watch_rx) = watch::channel(0u64); let interrupt_tx = Arc::new(watch_tx); - let session = Arc::new(SharedSession { - session_id: "hub-test".into(), - clients: Mutex::new(Vec::new()), - input_tx: input_tx.clone(), - interrupt: interrupt.clone(), - interrupt_tx: interrupt_tx.clone(), - agent_shared: Mutex::new(None), - }); + let session = Arc::new(SharedSession::placeholder( + input_tx.clone(), + interrupt.clone(), + interrupt_tx.clone(), + )); let (server_conn, client_conn, client_rx) = conn_pair(); session.add_client("test".into(), server_conn).await; @@ -125,18 +122,8 @@ pub async fn build_hub_harness_with( config.settings.permission_mode = pm; } let start = StartParams { - cwd: None, - model: None, - mode: None, - prompt: None, - permission_mode: None, - decision_mode: None, no_sandbox: true, - resume: None, - lifecycle: loopal_runtime::LifecycleMode::Persistent, - agent_type: None, - depth: None, - fork_context: None, + ..Default::default() }; let (hub_conn, _hub_peer) = loopal_ipc::duplex_pair(); let (hub_connection, _hub_rx) = loopal_ipc::Connection::new(hub_conn).into_listening(); @@ -157,6 +144,7 @@ pub async fn build_hub_harness_with( loopal_runtime::frontend::DecisionContext::with_cwd("/tmp/test"), loopal_runtime::frontend::DecisionCell::new(loopal_decision_api::DecisionMode::Manual), "harness-session", + loopal_provider_api::SharedModelRouter::new(build_model_router(&config.settings)), ), ) .await diff --git a/crates/loopal-agent-server/tests/suite/session_handlers_factory_test.rs b/crates/loopal-agent-server/tests/suite/session_handlers_factory_test.rs index ecc0dcef..028d5f36 100644 --- a/crates/loopal-agent-server/tests/suite/session_handlers_factory_test.rs +++ b/crates/loopal-agent-server/tests/suite/session_handlers_factory_test.rs @@ -1,11 +1,33 @@ use indexmap::IndexMap; use std::sync::Arc; -use loopal_agent_server::testing::{SessionRef, SharedSession, build_session_handlers}; +use loopal_agent_server::testing::{ + SessionRef, SharedSession, build_model_router, build_session_handlers, +}; use loopal_config::{ResolvedConfig, Settings}; use loopal_decision_api::DecisionMode; use loopal_kernel::Kernel; -use loopal_runtime::frontend::DecisionContext; +use loopal_provider_api::SharedModelRouter; +use loopal_runtime::frontend::permission_handler::PermissionHandler; +use loopal_runtime::frontend::question_handler::QuestionHandler; +use loopal_runtime::frontend::{DecisionCell, DecisionContext}; + +type Handlers = ( + Box, + Box, + DecisionCell, +); + +fn handlers(config: &ResolvedConfig, kernel: &Arc, session: SessionRef) -> Handlers { + let router = SharedModelRouter::new(build_model_router(&config.settings)).reader(); + build_session_handlers( + config, + kernel, + session, + DecisionContext::with_cwd("/tmp/test"), + router, + ) +} fn empty_config(decision: DecisionMode) -> ResolvedConfig { let settings = Settings { @@ -39,12 +61,7 @@ async fn manual_decision_yields_ipc_only_no_primary_connection() { let config = empty_config(DecisionMode::Manual); let kernel = Arc::new(Kernel::new(Settings::default()).unwrap()); let session = dummy_session(); - let (perm, _q, _cell) = build_session_handlers( - &config, - &kernel, - session, - DecisionContext::with_cwd("/tmp/test"), - ); + let (perm, _q, _cell) = handlers(&config, &kernel, session); let outcome = perm.decide("id1", "Bash", &serde_json::json!({})).await; assert_eq!( outcome.decision, @@ -63,12 +80,7 @@ async fn auto_decision_wraps_with_auto_handlers_and_falls_back() { let config = empty_config(DecisionMode::Classifier); let kernel = Arc::new(Kernel::new(Settings::default()).unwrap()); let session = dummy_session(); - let (perm, _q, _cell) = build_session_handlers( - &config, - &kernel, - session, - DecisionContext::with_cwd("/tmp/test"), - ); + let (perm, _q, _cell) = handlers(&config, &kernel, session); let outcome = perm.decide("id1", "Bash", &serde_json::json!({})).await; assert_eq!( outcome.decision, @@ -92,12 +104,7 @@ async fn manual_question_path_cancels_without_connection() { let config = empty_config(DecisionMode::Manual); let kernel = Arc::new(Kernel::new(Settings::default()).unwrap()); let session = dummy_session(); - let (_perm, q, _cell) = build_session_handlers( - &config, - &kernel, - session, - DecisionContext::with_cwd("/tmp/test"), - ); + let (_perm, q, _cell) = handlers(&config, &kernel, session); let outcome = q.ask(vec![]).await; assert!( matches!( @@ -118,12 +125,7 @@ async fn auto_question_path_chains_fallback_when_provider_unresolvable() { let config = empty_config(DecisionMode::Classifier); let kernel = Arc::new(Kernel::new(Settings::default()).unwrap()); let session = dummy_session(); - let (_perm, q, _cell) = build_session_handlers( - &config, - &kernel, - session, - DecisionContext::with_cwd("/tmp/test"), - ); + let (_perm, q, _cell) = handlers(&config, &kernel, session); let outcome = q.ask(vec![]).await; assert!( matches!( @@ -144,12 +146,7 @@ async fn decision_cell_switch_flips_manual_to_classifier_at_runtime() { let config = empty_config(DecisionMode::Manual); let kernel = Arc::new(Kernel::new(Settings::default()).unwrap()); let session = dummy_session(); - let (perm, _q, cell) = build_session_handlers( - &config, - &kernel, - session, - DecisionContext::with_cwd("/tmp/test"), - ); + let (perm, _q, cell) = handlers(&config, &kernel, session); let manual = perm.decide("id1", "Bash", &serde_json::json!({})).await; assert!( manual.reason.contains("no primary connection"), @@ -172,12 +169,7 @@ async fn agent_decision_falls_back_to_classifier_path_today() { let config = empty_config(DecisionMode::Agent); let kernel = Arc::new(Kernel::new(Settings::default()).unwrap()); let session = dummy_session(); - let (perm, q, _cell) = build_session_handlers( - &config, - &kernel, - session, - DecisionContext::with_cwd("/tmp/test"), - ); + let (perm, q, _cell) = handlers(&config, &kernel, session); // Permission path: Agent → Classifier → IpcPermission fallback denies (no conn) let outcome = perm.decide("id1", "Bash", &serde_json::json!({})).await; assert_eq!( diff --git a/crates/loopal-agent-server/tests/suite/summarization_default_test.rs b/crates/loopal-agent-server/tests/suite/summarization_default_test.rs index 6c04b2bb..1c733765 100644 --- a/crates/loopal-agent-server/tests/suite/summarization_default_test.rs +++ b/crates/loopal-agent-server/tests/suite/summarization_default_test.rs @@ -4,8 +4,6 @@ use loopal_agent_server::testing::build_model_router; use loopal_config::Settings; use loopal_provider_api::TaskType; -const HAIKU: &str = "claude-haiku-4-5-20251001"; - fn settings_with(model: &str, routing: HashMap) -> Settings { Settings { model: model.into(), @@ -15,9 +13,9 @@ fn settings_with(model: &str, routing: HashMap) -> Settings { } #[test] -fn summarization_defaults_to_haiku_when_unconfigured() { +fn summarization_falls_back_to_main_model_when_unconfigured() { let router = build_model_router(&settings_with("claude-opus-4-7", HashMap::new())); - assert_eq!(router.resolve(TaskType::Summarization), HAIKU); + assert_eq!(router.resolve(TaskType::Summarization), "claude-opus-4-7"); } #[test] @@ -29,7 +27,7 @@ fn user_summarization_model_overrides_default() { } #[test] -fn default_task_unaffected_by_summarization_injection() { +fn default_task_resolves_to_main_model() { let router = build_model_router(&settings_with("claude-opus-4-7", HashMap::new())); assert_eq!(router.resolve(TaskType::Default), "claude-opus-4-7"); } @@ -42,8 +40,8 @@ fn other_task_types_fall_back_to_default_model() { } #[test] -fn empty_routing_still_yields_haiku_for_summarization() { - let router = build_model_router(&settings_with("anthropic-test-model", HashMap::new())); - assert_eq!(router.resolve(TaskType::Summarization), HAIKU); - assert_eq!(router.resolve(TaskType::Default), "anthropic-test-model"); +fn gpt_only_deployment_summarizes_with_its_main_model() { + let router = build_model_router(&settings_with("gpt-5.5", HashMap::new())); + assert_eq!(router.resolve(TaskType::Summarization), "gpt-5.5"); + assert_eq!(router.resolve(TaskType::Default), "gpt-5.5"); } diff --git a/crates/loopal-config/src/settings/core.rs b/crates/loopal-config/src/settings/core.rs index 66e0f888..fc6fcf5a 100644 --- a/crates/loopal-config/src/settings/core.rs +++ b/crates/loopal-config/src/settings/core.rs @@ -15,6 +15,9 @@ use loopal_decision_api::DecisionMode; use loopal_provider_api::{ModelOverride, TaskType, ThinkingConfig}; use loopal_tool_api::{BgTaskConfig, PermissionMode}; +/// Fallback model id when no config layer sets `model`. +const DEFAULT_MODEL: &str = "claude-opus-4-8"; + #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(default)] pub struct Settings { @@ -79,7 +82,7 @@ pub struct Settings { impl Default for Settings { fn default() -> Self { Self { - model: "claude-opus-4-8".to_string(), + model: DEFAULT_MODEL.to_string(), model_routing: HashMap::new(), models: HashMap::new(), permission_mode: PermissionMode::default(), diff --git a/crates/loopal-hooks/tests/suite/executor_factory_test.rs b/crates/loopal-hooks/tests/suite/executor_factory_test.rs index 236b3241..c4b7a2c5 100644 --- a/crates/loopal-hooks/tests/suite/executor_factory_test.rs +++ b/crates/loopal-hooks/tests/suite/executor_factory_test.rs @@ -60,12 +60,24 @@ async fn test_factory_prompt_with_provider_creates_executor() { ]; let provider: Arc = Arc::new(MockProvider::new(chunks)); let factory = DefaultExecutorFactory::new(Some(provider)); - let executor = factory.create(&make_config(HookType::Prompt)).unwrap(); + // Prompt hooks now require an explicit model (no cross-provider default). + let mut config = make_config(HookType::Prompt); + config.model = Some("claude-sonnet-4-20250514".into()); + let executor = factory.create(&config).unwrap(); let result = executor.execute(serde_json::json!({})).await; assert!(result.is_ok()); assert_eq!(result.unwrap().exit_code, 0); } +#[tokio::test] +async fn test_factory_prompt_without_model_returns_none() { + use loopal_test_support::mock_provider::MockProvider; + let provider: Arc = Arc::new(MockProvider::new(vec![])); + let factory = DefaultExecutorFactory::new(Some(provider)); + // Provider present but `model` unset → no executor (no cross-provider default). + assert!(factory.create(&make_config(HookType::Prompt)).is_none()); +} + #[tokio::test] async fn test_factory_http_missing_url_returns_none() { let factory = DefaultExecutorFactory::new(None); diff --git a/crates/loopal-kernel/BUILD.bazel b/crates/loopal-kernel/BUILD.bazel index 2b6d313a..2649caab 100644 --- a/crates/loopal-kernel/BUILD.bazel +++ b/crates/loopal-kernel/BUILD.bazel @@ -39,6 +39,7 @@ rust_test( "//crates/loopal-error", "//crates/loopal-mcp", "//crates/loopal-provider", + "//crates/loopal-provider-api", "//crates/loopal-tool-api", "@crates//:rmcp", "@crates//:serde_json", diff --git a/crates/loopal-kernel/src/hook_factory.rs b/crates/loopal-kernel/src/hook_factory.rs index d54cb2a4..2586cf71 100644 --- a/crates/loopal-kernel/src/hook_factory.rs +++ b/crates/loopal-kernel/src/hook_factory.rs @@ -54,12 +54,16 @@ impl ExecutorFactory for DefaultExecutorFactory { error!("Prompt hook requires a Provider but none available, skipping"); return None; }; + // No cross-provider default model: an unset `model` is a config + // error, not a silent fallback to some provider's model that may + // not even be registered. + let Some(model) = config.model.clone() else { + error!("Prompt hook requires an explicit `model`; none set, skipping"); + return None; + }; Some(Box::new(PromptExecutor { system_prompt: config.prompt.clone().unwrap_or_default(), - model: config - .model - .clone() - .unwrap_or_else(|| "claude-haiku-4-5-20251001".into()), + model, provider: provider.clone(), timeout, max_tokens: 256, diff --git a/crates/loopal-kernel/src/kernel/mod.rs b/crates/loopal-kernel/src/kernel/mod.rs index ca4916fa..33eddd6b 100644 --- a/crates/loopal-kernel/src/kernel/mod.rs +++ b/crates/loopal-kernel/src/kernel/mod.rs @@ -1,5 +1,6 @@ mod backend; mod mcp; +mod resolve; use std::sync::Arc; diff --git a/crates/loopal-kernel/src/kernel/resolve.rs b/crates/loopal-kernel/src/kernel/resolve.rs new file mode 100644 index 00000000..d15e2974 --- /dev/null +++ b/crates/loopal-kernel/src/kernel/resolve.rs @@ -0,0 +1,29 @@ +use std::sync::Arc; + +use loopal_error::LoopalError; +use loopal_provider_api::{ModelRouter, Provider, TaskType}; + +use super::Kernel; + +impl Kernel { + /// Single chokepoint from a `TaskType` to a usable `(model, provider)` + /// pair: route the task through `router`, then resolve the provider. + /// Errors loudly when the routed model has no registered provider — + /// callers must not collapse that into a silent no-op. + pub fn resolve_task( + &self, + router: &ModelRouter, + task: TaskType, + ) -> std::result::Result<(String, Arc), LoopalError> { + let model = router.resolve(task).to_string(); + let provider = self.resolve_provider(&model)?; + Ok((model, provider)) + } + + /// The configured models (`settings.model` + `model_routing`) that resolve + /// to no registered provider. Pure mechanism — the caller owns the logging + /// severity and any decision to act on it. + pub fn unresolvable_models(&self) -> Vec { + crate::preflight::unresolvable_models(&self.settings, &self.provider_registry) + } +} diff --git a/crates/loopal-kernel/src/lib.rs b/crates/loopal-kernel/src/lib.rs index 7f70e392..8b8711c5 100644 --- a/crates/loopal-kernel/src/lib.rs +++ b/crates/loopal-kernel/src/lib.rs @@ -1,10 +1,12 @@ pub mod bg_gc; pub mod hook_factory; pub mod kernel; +pub mod preflight; pub mod provider_registry; pub mod sampling; pub use bg_gc::spawn_bg_gc_tick; pub use kernel::Kernel; +pub use preflight::{RoutedSlot, UnresolvableModel, unresolvable_models}; pub use provider_registry::{register_providers, resolve_api_key}; pub use sampling::McpSamplingAdapter; diff --git a/crates/loopal-kernel/src/preflight.rs b/crates/loopal-kernel/src/preflight.rs new file mode 100644 index 00000000..d5a5af6c --- /dev/null +++ b/crates/loopal-kernel/src/preflight.rs @@ -0,0 +1,53 @@ +use loopal_config::Settings; +use loopal_provider::ProviderRegistry; +use loopal_provider_api::TaskType; + +/// Which routing slot a model came from. Distinguishes the base `settings.model` +/// from a per-task `model_routing` entry without overloading `Option`. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RoutedSlot { + MainModel, + Task(TaskType), +} + +/// A configured model that resolves to no registered provider. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct UnresolvableModel { + pub slot: RoutedSlot, + pub model: String, +} + +/// Collect every configured model (`settings.model` plus each `model_routing` +/// value) that resolves to no registered provider. Pure over its inputs so it +/// is unit-testable without a full Kernel; callers decide how loudly to report. +pub fn unresolvable_models( + settings: &Settings, + registry: &ProviderRegistry, +) -> Vec { + let mut out = Vec::new(); + // Effective main model: a `model_routing[Default]` override wins over + // `settings.model` (mirrors `ModelRouter::resolve`), so validate that — else + // a resolvable override would false-flag the (unused) overridden base. + let main = settings + .model_routing + .get(&TaskType::Default) + .unwrap_or(&settings.model); + if !registry.is_model_resolvable(main) { + out.push(UnresolvableModel { + slot: RoutedSlot::MainModel, + model: main.clone(), + }); + } + for (task, model) in &settings.model_routing { + if *task == TaskType::Default { + continue; // folded into the main-model check above + } + if !registry.is_model_resolvable(model) { + out.push(UnresolvableModel { + slot: RoutedSlot::Task(*task), + model: model.clone(), + }); + } + } + out +} diff --git a/crates/loopal-kernel/tests/suite.rs b/crates/loopal-kernel/tests/suite.rs index 424097a0..8b020160 100644 --- a/crates/loopal-kernel/tests/suite.rs +++ b/crates/loopal-kernel/tests/suite.rs @@ -11,5 +11,7 @@ mod kernel_mcp_test; mod kernel_openai_test; #[path = "suite/kernel_provider_registry_test.rs"] mod kernel_provider_registry_test; +#[path = "suite/kernel_resolve_test.rs"] +mod kernel_resolve_test; #[path = "suite/kernel_sandbox_test.rs"] mod kernel_sandbox_test; diff --git a/crates/loopal-kernel/tests/suite/kernel_resolve_test.rs b/crates/loopal-kernel/tests/suite/kernel_resolve_test.rs new file mode 100644 index 00000000..6476e852 --- /dev/null +++ b/crates/loopal-kernel/tests/suite/kernel_resolve_test.rs @@ -0,0 +1,164 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use loopal_config::{ProviderConfig, ProvidersConfig, Settings}; +use loopal_kernel::{Kernel, RoutedSlot, unresolvable_models}; +use loopal_provider::{OpenAiProvider, ProviderRegistry}; +use loopal_provider_api::{ModelRouter, SharedModelRouter, TaskType}; + +fn openai_kernel(model: &str, routing: HashMap) -> Kernel { + let settings = Settings { + model: model.into(), + model_routing: routing, + providers: ProvidersConfig { + anthropic: None, + openai: Some(ProviderConfig { + api_key: Some("test-openai-key".into()), + api_key_env: None, + base_url: None, + }), + google: None, + openai_compat: vec![], + }, + ..Default::default() + }; + Kernel::new(settings).unwrap() +} + +#[test] +fn resolve_task_summarization_falls_back_to_main_model() { + let kernel = openai_kernel("gpt-5.5", HashMap::new()); + let router = ModelRouter::from_parts("gpt-5.5".into(), HashMap::new()); + let (model, _provider) = kernel + .resolve_task(&router, TaskType::Summarization) + .expect("summarization falls back to the resolvable main model"); + assert_eq!(model, "gpt-5.5"); +} + +#[test] +fn resolve_task_errors_when_routed_model_has_no_provider() { + let mut routing = HashMap::new(); + routing.insert(TaskType::Summarization, "mystery-model-9000".into()); + let kernel = openai_kernel("gpt-5.5", routing.clone()); + let router = ModelRouter::from_parts("gpt-5.5".into(), routing); + assert!( + kernel + .resolve_task(&router, TaskType::Summarization) + .is_err() + ); +} + +#[test] +fn resolve_task_uses_explicit_override_when_provider_present() { + let mut routing = HashMap::new(); + routing.insert(TaskType::Summarization, "gpt-4o".into()); + let kernel = openai_kernel("gpt-5.5", routing.clone()); + let router = ModelRouter::from_parts("gpt-5.5".into(), routing); + let (model, _) = kernel + .resolve_task(&router, TaskType::Summarization) + .unwrap(); + assert_eq!(model, "gpt-4o"); +} + +#[test] +fn unresolvable_models_flags_only_the_anthropic_route() { + let mut routing = HashMap::new(); + routing.insert(TaskType::Summarization, "claude-haiku-4-5-20251001".into()); + routing.insert(TaskType::Refine, "gpt-4o".into()); + let settings = Settings { + model: "gpt-5.5".into(), + model_routing: routing, + ..Default::default() + }; + let mut registry = ProviderRegistry::new(); + registry.register(Arc::new(OpenAiProvider::new("k".into()))); + + let flagged = unresolvable_models(&settings, ®istry); + assert_eq!(flagged.len(), 1); + assert_eq!(flagged[0].slot, RoutedSlot::Task(TaskType::Summarization)); + assert_eq!(flagged[0].model, "claude-haiku-4-5-20251001"); +} + +#[test] +fn unresolvable_models_flags_unreachable_main_model() { + let settings = Settings { + model: "claude-opus-4-8".into(), + model_routing: HashMap::new(), + ..Default::default() + }; + let mut registry = ProviderRegistry::new(); + registry.register(Arc::new(OpenAiProvider::new("k".into()))); + + let flagged = unresolvable_models(&settings, ®istry); + assert_eq!(flagged.len(), 1); + assert_eq!(flagged[0].slot, RoutedSlot::MainModel); + assert_eq!(flagged[0].model, "claude-opus-4-8"); +} + +#[test] +fn unresolvable_models_empty_when_all_resolvable() { + let settings = Settings { + model: "gpt-5.5".into(), + model_routing: HashMap::new(), + ..Default::default() + }; + let mut registry = ProviderRegistry::new(); + registry.register(Arc::new(OpenAiProvider::new("k".into()))); + + assert!(unresolvable_models(&settings, ®istry).is_empty()); +} + +#[test] +fn resolve_task_via_reader_observes_model_switch() { + // The classifier resolves through a ModelRouterReader sharing the runner's + // router; a mid-session /model switch on the writer must be observed here — + // proving the classifier-stale fix end-to-end through the resolve chokepoint. + let kernel = openai_kernel("gpt-5.5", HashMap::new()); + let writer = SharedModelRouter::with_default("gpt-5.5".into()); + let reader = writer.reader(); + + let (before, _) = kernel + .resolve_task(&reader.read(), TaskType::Default) + .unwrap(); + assert_eq!(before, "gpt-5.5"); + + writer.set_default("gpt-4o".into()); + let (after, _) = kernel + .resolve_task(&reader.read(), TaskType::Default) + .unwrap(); + assert_eq!( + after, "gpt-4o", + "reader must see the writer's /model switch" + ); +} + +#[test] +fn kernel_unresolvable_models_reports_via_own_settings() { + // Env-independent: an unknown model resolves to openai_compat, which is + // never auto-registered (unlike claude-*, which an ANTHROPIC_API_KEY in the + // environment would silently make resolvable). + let mut routing = HashMap::new(); + routing.insert(TaskType::Summarization, "mystery-model-9000".into()); + let kernel = openai_kernel("gpt-5.5", routing); + let flagged = kernel.unresolvable_models(); + assert_eq!(flagged.len(), 1); + assert_eq!(flagged[0].slot, RoutedSlot::Task(TaskType::Summarization)); + assert_eq!(flagged[0].model, "mystery-model-9000"); +} + +#[test] +fn unresolvable_models_respects_resolvable_default_override() { + // Base model is anthropic (unregistered) but model_routing.default overrides + // it with a registered openai model — the unused base must NOT be flagged. + let mut routing = HashMap::new(); + routing.insert(TaskType::Default, "gpt-5.5".into()); + let settings = Settings { + model: "claude-opus-4-8".into(), + model_routing: routing, + ..Default::default() + }; + let mut registry = ProviderRegistry::new(); + registry.register(Arc::new(OpenAiProvider::new("k".into()))); + + assert!(unresolvable_models(&settings, ®istry).is_empty()); +} diff --git a/crates/loopal-provider-api/src/lib.rs b/crates/loopal-provider-api/src/lib.rs index 413a42f4..4d157665 100644 --- a/crates/loopal-provider-api/src/lib.rs +++ b/crates/loopal-provider-api/src/lib.rs @@ -12,7 +12,7 @@ use loopal_error::LoopalError; pub use chat::{ChatParams, ChatStream, StopReason, StreamChunk}; pub use model::*; -pub use model_router::ModelRouter; +pub use model_router::{ModelRouter, ModelRouterReader, SharedModelRouter}; pub use resolver::ProviderResolver; pub use thinking::*; pub use wire::{ diff --git a/crates/loopal-provider-api/src/model_router.rs b/crates/loopal-provider-api/src/model_router.rs index 35ed560b..f5292673 100644 --- a/crates/loopal-provider-api/src/model_router.rs +++ b/crates/loopal-provider-api/src/model_router.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::sync::{Arc, RwLock, RwLockReadGuard}; use crate::TaskType; @@ -46,3 +47,105 @@ impl ModelRouter { self.default_model = model; } } + +/// Shared, interior-mutable handle to a single `ModelRouter` instance. +/// +/// One `SharedModelRouter` is the single source of truth per agent: the agent +/// loop's `/model` switch writes through it, and every resolver (compaction, +/// classifier) reads through the same `Arc`, so a mid-session model change is +/// observed everywhere instead of leaving stale clones behind. +#[derive(Clone)] +pub struct SharedModelRouter(Arc>); + +impl SharedModelRouter { + pub fn new(router: ModelRouter) -> Self { + Self(Arc::new(RwLock::new(router))) + } + + pub fn from_parts(default_model: String, routing: HashMap) -> Self { + Self::new(ModelRouter::from_parts(default_model, routing)) + } + + pub fn with_default(default_model: String) -> Self { + Self::new(ModelRouter::new(default_model)) + } + + /// Borrow the inner router for a synchronous read (e.g. to pass to + /// `Kernel::resolve_task`). The guard must not be held across `.await`. + pub fn read(&self) -> RwLockReadGuard<'_, ModelRouter> { + self.0.read().expect("model router lock poisoned") + } + + pub fn set_default(&self, model: String) { + self.0 + .write() + .expect("model router lock poisoned") + .set_default(model); + } + + pub fn model(&self) -> String { + self.read().resolve(TaskType::Default).to_string() + } + + /// A read-only handle onto the *same* router instance. Hand this to + /// resolvers that must observe `/model` switches but must never write — + /// the write capability stays with the agent loop's `SharedModelRouter`. + pub fn reader(&self) -> ModelRouterReader { + ModelRouterReader(self.0.clone()) + } +} + +/// Read-only view of a shared `ModelRouter`. Shares the underlying `Arc` with +/// the owning `SharedModelRouter`, so it always sees the latest `/model` +/// switch, but exposes no mutator — single-writer is enforced by type. +#[derive(Clone)] +pub struct ModelRouterReader(Arc>); + +impl ModelRouterReader { + pub fn read(&self) -> RwLockReadGuard<'_, ModelRouter> { + self.0.read().expect("model router lock poisoned") + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn shared_router_clones_observe_set_default() { + let a = SharedModelRouter::with_default("model-a".into()); + let b = a.clone(); + assert_eq!(a.model(), "model-a"); + assert_eq!(b.model(), "model-a"); + // Mutating through one clone is visible through the other: single + // source of truth, no stale copies (the classifier-stale fix). + b.set_default("model-b".into()); + assert_eq!(a.model(), "model-b"); + assert_eq!(b.read().resolve(TaskType::Default), "model-b"); + } + + #[test] + fn read_only_handle_observes_writer_set_default() { + let writer = SharedModelRouter::with_default("m1".into()); + let reader = writer.reader(); + assert_eq!(reader.read().resolve(TaskType::Default), "m1"); + // The runner writes via `set_default`; the classifier's read-only + // handle sees it (same Arc) without ever holding write capability. + writer.set_default("m2".into()); + assert_eq!(reader.read().resolve(TaskType::Default), "m2"); + } + + #[test] + fn from_parts_carries_routing_and_with_default_falls_back() { + let mut routing = std::collections::HashMap::new(); + routing.insert(TaskType::Summarization, "sum-model".into()); + let r = SharedModelRouter::from_parts("main".into(), routing); + assert_eq!(r.model(), "main"); + assert_eq!(r.read().resolve(TaskType::Summarization), "sum-model"); + + let d = SharedModelRouter::with_default("only".into()); + assert_eq!(d.model(), "only"); + // No overrides → every task falls back to the default model. + assert_eq!(d.read().resolve(TaskType::Summarization), "only"); + } +} diff --git a/crates/loopal-provider/src/router.rs b/crates/loopal-provider/src/router.rs index 17dab4a5..f39a9f79 100644 --- a/crates/loopal-provider/src/router.rs +++ b/crates/loopal-provider/src/router.rs @@ -72,6 +72,14 @@ impl ProviderRegistry { pub fn get(&self, name: &str) -> Option> { self.providers.get(name).cloned() } + + /// Whether `model` resolves to a registered provider. Keyed by model + /// (catalog → prefix-map → heuristic), not by provider name, so it + /// mirrors what `resolve` would actually return. Used by preflight to + /// surface dead routes before they fail at call time. + pub fn is_model_resolvable(&self, model: &str) -> bool { + self.resolve(model).is_ok() + } } impl Default for ProviderRegistry { diff --git a/crates/loopal-provider/tests/suite/router_test.rs b/crates/loopal-provider/tests/suite/router_test.rs index 6df07b0f..389aaecc 100644 --- a/crates/loopal-provider/tests/suite/router_test.rs +++ b/crates/loopal-provider/tests/suite/router_test.rs @@ -150,3 +150,20 @@ fn test_prefix_takes_priority_over_heuristic() { let provider = registry.resolve("llama-3").unwrap(); assert_eq!(provider.name(), "ollama"); } + +// -- is_model_resolvable --------------------------------------------------- + +#[test] +fn is_model_resolvable_true_for_registered_provider() { + let mut registry = ProviderRegistry::new(); + registry.register(Arc::new(MockProvider::new("openai"))); + assert!(registry.is_model_resolvable("gpt-4o")); +} + +#[test] +fn is_model_resolvable_false_when_routed_provider_absent() { + let mut registry = ProviderRegistry::new(); + registry.register(Arc::new(MockProvider::new("openai"))); + // claude → anthropic, which is not registered. + assert!(!registry.is_model_resolvable("claude-sonnet-4-20250514")); +} diff --git a/crates/loopal-runtime/src/agent_loop/cold_start_emit.rs b/crates/loopal-runtime/src/agent_loop/cold_start_emit.rs index 10a44725..2a61d170 100644 --- a/crates/loopal-runtime/src/agent_loop/cold_start_emit.rs +++ b/crates/loopal-runtime/src/agent_loop/cold_start_emit.rs @@ -6,7 +6,7 @@ use super::runner::AgentLoopRunner; impl AgentLoopRunner { pub(super) async fn emit_cold_start_observables(&self) -> Result<()> { self.emit(AgentEventPayload::ModelChanged { - model: self.params.config.model().to_string(), + model: self.params.config.model(), }) .await?; self.emit(AgentEventPayload::ModeChanged { diff --git a/crates/loopal-runtime/src/agent_loop/compaction_run.rs b/crates/loopal-runtime/src/agent_loop/compaction_run.rs index 0cdde7c2..87c35ec6 100644 --- a/crates/loopal-runtime/src/agent_loop/compaction_run.rs +++ b/crates/loopal-runtime/src/agent_loop/compaction_run.rs @@ -25,16 +25,27 @@ impl AgentLoopRunner { strategy: &'static str, cancel: &CancellationToken, ) -> Result { - let compact_model = self - .params - .config - .router - .resolve(loopal_provider_api::TaskType::Summarization) - .to_string(); - let provider = match self.params.deps.kernel.resolve_provider(&compact_model) { - Ok(p) => p, + let resolved = { + let router = self.params.config.router.read(); + self.params + .deps + .kernel + .resolve_task(&router, loopal_provider_api::TaskType::Summarization) + }; + let (compact_model, provider) = match resolved { + Ok(pair) => pair, Err(e) => { - warn!(error = %e, model = %compact_model, "summarization provider unavailable"); + warn!(error = %e, "summarization provider unavailable"); + // Inline notice, best-effort — NOT AgentEventPayload::Error, + // which frontends treat as agent termination (snapping a viewed + // sub-agent back to root every auto-compact tick). + self.emit_cosmetic(AgentEventPayload::Stream { + text: format!( + "[compaction unavailable: summarization model has no provider ({e}); \ + set model_routing.summarization or configure the provider]\n" + ), + }) + .await; return Ok(false); } }; diff --git a/crates/loopal-runtime/src/agent_loop/llm.rs b/crates/loopal-runtime/src/agent_loop/llm.rs index 79bd7d66..fc675b1e 100644 --- a/crates/loopal-runtime/src/agent_loop/llm.rs +++ b/crates/loopal-runtime/src/agent_loop/llm.rs @@ -34,11 +34,8 @@ impl AgentLoopRunner { ) .await?; } - let provider = self - .params - .deps - .kernel - .resolve_provider(self.params.config.model())?; + let model = self.params.config.model(); + let provider = self.params.deps.kernel.resolve_provider(&model)?; // PreRequest hook: notify before LLM call. crate::fire_hooks::fire_hooks( @@ -53,7 +50,7 @@ impl AgentLoopRunner { let llm_start = Instant::now(); info!( - model = %self.params.config.model(), turns = chat_params.turns.len(), + model = %model, turns = chat_params.turns.len(), tools = chat_params.tools.len(), max_tokens = chat_params.max_tokens, thinking = ?chat_params.thinking, "LLM request" ); @@ -105,10 +102,7 @@ impl AgentLoopRunner { "LLM complete" ); let llm_attrs = &[ - KeyValue::new( - "gen_ai.request.model", - self.params.config.model().to_string(), - ), + KeyValue::new("gen_ai.request.model", model), KeyValue::new("gen_ai.system", provider.name().to_string()), ]; crate::otel_metrics::llm_duration().record(llm_duration.as_secs_f64(), llm_attrs); diff --git a/crates/loopal-runtime/src/agent_loop/llm_params.rs b/crates/loopal-runtime/src/agent_loop/llm_params.rs index c64f22fb..4eb0030f 100644 --- a/crates/loopal-runtime/src/agent_loop/llm_params.rs +++ b/crates/loopal-runtime/src/agent_loop/llm_params.rs @@ -56,11 +56,12 @@ impl AgentLoopRunner { .budget() .clamp_output_tokens(estimated_input); - let capability = get_thinking_capability(self.params.config.model()); + let model = self.params.config.model(); + let capability = get_thinking_capability(&model); let resolved_thinking = resolve_thinking_config(&self.model_config.thinking, capability, safe_max_tokens); Ok(ChatParams { - model: self.params.config.model().to_string(), + model, turns, system_prompt: full_system_prompt, tools: tool_defs, diff --git a/crates/loopal-runtime/src/agent_loop/llm_record.rs b/crates/loopal-runtime/src/agent_loop/llm_record.rs index fc85dfff..3b3ee4d0 100644 --- a/crates/loopal-runtime/src/agent_loop/llm_record.rs +++ b/crates/loopal-runtime/src/agent_loop/llm_record.rs @@ -30,7 +30,7 @@ impl AgentLoopRunner { return; } let step = build_llm_call_step( - self.params.config.model(), + &self.params.config.model(), assistant_text, tool_uses, &server_blocks, diff --git a/crates/loopal-runtime/src/agent_loop/params.rs b/crates/loopal-runtime/src/agent_loop/params.rs index 0d18c973..c5df2c49 100644 --- a/crates/loopal-runtime/src/agent_loop/params.rs +++ b/crates/loopal-runtime/src/agent_loop/params.rs @@ -5,7 +5,7 @@ use loopal_config::HarnessConfig; use loopal_context::ContextBudget; use loopal_kernel::Kernel; use loopal_protocol::InterruptSignal; -use loopal_provider_api::{ModelRouter, ThinkingConfig}; +use loopal_provider_api::{SharedModelRouter, ThinkingConfig}; use loopal_storage::Session; use loopal_tool_api::{FetchRefinerPolicy, MemoryChannel, OneShotChatService, PermissionMode}; use tokio::sync::watch; @@ -25,7 +25,7 @@ pub enum LifecycleMode { pub struct AgentConfig { pub lifecycle: LifecycleMode, - pub router: ModelRouter, + pub router: SharedModelRouter, pub system_prompt: String, pub mode: AgentMode, pub permission_mode: PermissionMode, @@ -46,8 +46,8 @@ pub struct PlanModeState { } impl AgentConfig { - pub fn model(&self) -> &str { - self.router.resolve(loopal_provider_api::TaskType::Default) + pub fn model(&self) -> String { + self.router.model() } } @@ -55,7 +55,10 @@ impl Default for AgentConfig { fn default() -> Self { Self { lifecycle: LifecycleMode::default(), - router: ModelRouter::new("claude-sonnet-4-20250514".into()), + // reason: test-only fixture (production sets the real router in + // agent_setup). Pinned to a specific model so token-math tests stay + // calibrated — NOT coupled to the production default model. + router: SharedModelRouter::with_default("claude-sonnet-4-20250514".into()), system_prompt: String::new(), mode: AgentMode::Act, permission_mode: PermissionMode::Bypass, diff --git a/crates/loopal-runtime/src/agent_loop/runner.rs b/crates/loopal-runtime/src/agent_loop/runner.rs index fc02ee89..a1f1284b 100644 --- a/crates/loopal-runtime/src/agent_loop/runner.rs +++ b/crates/loopal-runtime/src/agent_loop/runner.rs @@ -77,7 +77,7 @@ impl AgentLoopRunner { .with_goal_session_opt(goal_adapter) .with_secret_client_opt(params.deps.kernel.secret_client().cloned()); let model_config = ModelConfig::from_model( - params.config.model(), + ¶ms.config.model(), params.config.thinking_config.clone(), params.config.context_tokens_cap, ); diff --git a/crates/loopal-runtime/src/agent_loop/turn_recover.rs b/crates/loopal-runtime/src/agent_loop/turn_recover.rs index 73b7a8da..02805371 100644 --- a/crates/loopal-runtime/src/agent_loop/turn_recover.rs +++ b/crates/loopal-runtime/src/agent_loop/turn_recover.rs @@ -12,7 +12,7 @@ impl AgentLoopRunner { .params .deps .kernel - .resolve_provider(self.params.config.model()) + .resolve_provider(&self.params.config.model()) { Ok(provider) => provider.classify_error(err), Err(_) => default_classify_error(err), diff --git a/crates/loopal-runtime/src/agent_loop/turn_telemetry.rs b/crates/loopal-runtime/src/agent_loop/turn_telemetry.rs index 6c5c38fa..9aed91bb 100644 --- a/crates/loopal-runtime/src/agent_loop/turn_telemetry.rs +++ b/crates/loopal-runtime/src/agent_loop/turn_telemetry.rs @@ -68,10 +68,7 @@ impl AgentLoopRunner { ); crate::otel_metrics::active_turns().add(-1, &[]); - let model_attr = KeyValue::new( - "gen_ai.request.model", - self.params.config.model().to_string(), - ); + let model_attr = KeyValue::new("gen_ai.request.model", self.params.config.model()); crate::otel_metrics::turn_duration().record( turn_duration.as_secs_f64(), std::slice::from_ref(&model_attr), diff --git a/crates/loopal-runtime/tests/agent_loop/compact_provider_error_e2e_test.rs b/crates/loopal-runtime/tests/agent_loop/compact_provider_error_e2e_test.rs new file mode 100644 index 00000000..82d6b8b1 --- /dev/null +++ b/crates/loopal-runtime/tests/agent_loop/compact_provider_error_e2e_test.rs @@ -0,0 +1,66 @@ +use loopal_protocol::AgentEventPayload; +use loopal_provider_api::Message; +use loopal_test_support::{HarnessBuilder, chunks}; + +fn five_user_messages() -> Vec { + (1..=5) + .map(|i| Message::user(&format!("turn {i} content"))) + .collect() +} + +fn saw_unavailable_notice(evts: &[AgentEventPayload]) -> bool { + evts.iter().any(|e| { + matches!(e, AgentEventPayload::Stream { text } + if text.contains("compaction unavailable")) + }) +} + +#[tokio::test] +async fn force_compact_surfaces_inline_notice_when_summarization_provider_missing() { + // Main model resolves (anthropic mock), but the summarization route points + // at a GPT model whose provider isn't registered → the resolve fails and + // must surface a visible inline notice (Stream), never a terminal Error. + let mut h = HarnessBuilder::new() + .calls(vec![chunks::text_turn("unused")]) + .summarization_model("gpt-4o") + .messages(five_user_messages()) + .build() + .await; + + let result = h.runner.force_compact(None).await; + assert!( + matches!(result, Ok(false)), + "missing summarization provider must yield Ok(false), got {result:?}" + ); + + let evts = loopal_test_support::events::drain_pending(&mut h.event_rx).await; + assert!( + saw_unavailable_notice(&evts), + "provider-unresolved compaction must surface an inline notice; got: {evts:?}" + ); + // Never the termination-semantic Error (would snap a viewed sub-agent to root). + assert!( + !evts + .iter() + .any(|e| matches!(e, AgentEventPayload::Error { .. })), + "must NOT emit AgentEventPayload::Error; got: {evts:?}" + ); +} + +#[tokio::test] +async fn force_compact_benign_decline_emits_no_unavailable_notice() { + // A single-turn conversation bails before any LLM resolve — no notice. + let mut h = HarnessBuilder::new() + .calls(vec![chunks::text_turn("unused")]) + .messages(vec![Message::user("only message")]) + .build() + .await; + + let _ = h.runner.force_compact(None).await; + + let evts = loopal_test_support::events::drain_pending(&mut h.event_rx).await; + assert!( + !saw_unavailable_notice(&evts), + "benign short-conversation decline must not surface an unavailable notice; got: {evts:?}" + ); +} diff --git a/crates/loopal-runtime/tests/agent_loop/context_budget_test.rs b/crates/loopal-runtime/tests/agent_loop/context_budget_test.rs index 38e46aad..9cab40cc 100644 --- a/crates/loopal-runtime/tests/agent_loop/context_budget_test.rs +++ b/crates/loopal-runtime/tests/agent_loop/context_budget_test.rs @@ -28,7 +28,7 @@ fn test_effective_context_window_cap_larger_than_model() { // cap > model_window → use model_window (min semantics) let (runner, _) = super::make_runner(); let mut mc = runner.model_config.clone(); - mc.context_tokens_cap = 500_000; + mc.context_tokens_cap = mc.max_context_tokens + 500_000; assert_eq!(mc.effective_context_window(), mc.max_context_tokens); } diff --git a/crates/loopal-runtime/tests/agent_loop/input_edge_test.rs b/crates/loopal-runtime/tests/agent_loop/input_edge_test.rs index bc93985a..69d20394 100644 --- a/crates/loopal-runtime/tests/agent_loop/input_edge_test.rs +++ b/crates/loopal-runtime/tests/agent_loop/input_edge_test.rs @@ -39,7 +39,9 @@ fn test_model_info_defaults_for_unknown_model() { let params = AgentLoopParamsBuilder::new( AgentConfig { - router: loopal_provider_api::ModelRouter::new("unknown-model-xyz".to_string()), + router: loopal_provider_api::SharedModelRouter::with_default( + "unknown-model-xyz".to_string(), + ), permission_mode: PermissionMode::AskAnyWrite, ..Default::default() }, diff --git a/crates/loopal-runtime/tests/agent_loop/input_emit_fail_edge_test.rs b/crates/loopal-runtime/tests/agent_loop/input_emit_fail_edge_test.rs index 6d85d2c9..362b4844 100644 --- a/crates/loopal-runtime/tests/agent_loop/input_emit_fail_edge_test.rs +++ b/crates/loopal-runtime/tests/agent_loop/input_emit_fail_edge_test.rs @@ -43,7 +43,7 @@ async fn test_model_switch_bails_out_when_event_emit_fails() { let (mut runner, event_rx, _mbox_tx, ctrl_tx, _perm_tx) = make_runner_with_channels(); drop(event_rx); - let original = runner.params.config.model().to_string(); + let original = runner.params.config.model(); ctrl_tx .send(ControlCommand::ModelSwitch("claude-opus-4-7".into())) .await diff --git a/crates/loopal-runtime/tests/agent_loop/mod.rs b/crates/loopal-runtime/tests/agent_loop/mod.rs index ae788458..e9643a7a 100644 --- a/crates/loopal-runtime/tests/agent_loop/mod.rs +++ b/crates/loopal-runtime/tests/agent_loop/mod.rs @@ -62,6 +62,7 @@ mod compact_force_e2e_test; mod compact_hooks_e2e_test; mod compact_instructions_e2e_test; mod compact_phases_e2e_test; +mod compact_provider_error_e2e_test; mod compact_token_sync_test; mod compaction_run_e2e_test; mod cron_e2e_test; diff --git a/crates/loopal-runtime/tests/agent_loop/model_routing_test.rs b/crates/loopal-runtime/tests/agent_loop/model_routing_test.rs index 4c83182d..4443a964 100644 --- a/crates/loopal-runtime/tests/agent_loop/model_routing_test.rs +++ b/crates/loopal-runtime/tests/agent_loop/model_routing_test.rs @@ -4,7 +4,7 @@ use std::sync::Arc; use loopal_config::Settings; use loopal_kernel::Kernel; use loopal_protocol::{AgentEvent, ControlCommand, Envelope}; -use loopal_provider_api::{ModelRouter, TaskType}; +use loopal_provider_api::{SharedModelRouter, TaskType}; use loopal_runtime::agent_loop::AgentLoopRunner; use loopal_runtime::frontend::{DenyAllHandler, UnsupportedQuestionHandler}; use loopal_runtime::{ @@ -36,7 +36,7 @@ fn make_runner_with_routing( let mut routing = HashMap::new(); routing.insert(TaskType::Summarization, summarization_model.to_string()); - let router = ModelRouter::from_parts("claude-sonnet-4-20250514".into(), routing); + let router = SharedModelRouter::from_parts("claude-sonnet-4-20250514".into(), routing); let params = AgentLoopParamsBuilder::new( AgentConfig { @@ -63,7 +63,12 @@ fn test_router_resolves_default_model() { assert_eq!(runner.params.config.model(), "claude-sonnet-4-20250514"); assert_eq!( - runner.params.config.router.resolve(TaskType::Default), + runner + .params + .config + .router + .read() + .resolve(TaskType::Default), "claude-sonnet-4-20250514" ); } @@ -73,14 +78,19 @@ fn test_router_resolves_summarization_override() { let (runner, _rx) = make_runner_with_routing("claude-haiku-3-5-20241022"); assert_eq!( - runner.params.config.router.resolve(TaskType::Summarization), + runner + .params + .config + .router + .read() + .resolve(TaskType::Summarization), "claude-haiku-3-5-20241022" ); } #[test] fn test_model_switch_preserves_summarization_override() { - let (mut runner, _rx) = make_runner_with_routing("claude-haiku-3-5-20241022"); + let (runner, _rx) = make_runner_with_routing("claude-haiku-3-5-20241022"); // Switch the default model runner @@ -93,7 +103,12 @@ fn test_model_switch_preserves_summarization_override() { assert_eq!(runner.params.config.model(), "claude-opus-4-6"); // Summarization override untouched assert_eq!( - runner.params.config.router.resolve(TaskType::Summarization), + runner + .params + .config + .router + .read() + .resolve(TaskType::Summarization), "claude-haiku-3-5-20241022" ); } @@ -118,7 +133,7 @@ fn test_model_routing_default_override_via_config_model() { // User sets model_routing.default to override the base model let mut routing = HashMap::new(); routing.insert(TaskType::Default, "claude-opus-4-6".into()); - let router = ModelRouter::from_parts("claude-sonnet-4-20250514".into(), routing); + let router = SharedModelRouter::from_parts("claude-sonnet-4-20250514".into(), routing); let params = AgentLoopParamsBuilder::new( AgentConfig { diff --git a/crates/loopal-runtime/tests/agent_loop/retry_cancel_test.rs b/crates/loopal-runtime/tests/agent_loop/retry_cancel_test.rs index 8a961fbc..316a30fd 100644 --- a/crates/loopal-runtime/tests/agent_loop/retry_cancel_test.rs +++ b/crates/loopal-runtime/tests/agent_loop/retry_cancel_test.rs @@ -92,7 +92,7 @@ async fn test_cancel_during_retry_sleep() { .params .deps .kernel - .resolve_provider(runner.params.config.model()) + .resolve_provider(&runner.params.config.model()) .unwrap(); // Signal cancel after a short delay (during retry sleep) @@ -144,7 +144,7 @@ async fn test_cancel_before_stream_chat_attempt() { .params .deps .kernel - .resolve_provider(runner.params.config.model()) + .resolve_provider(&runner.params.config.model()) .unwrap(); let stream = in_turn(runner.retry_stream_chat(¶ms, &*provider, &cancel)) diff --git a/crates/loopal-runtime/tests/agent_loop/run_test.rs b/crates/loopal-runtime/tests/agent_loop/run_test.rs index 0a8ea6f3..1eecb1b4 100644 --- a/crates/loopal-runtime/tests/agent_loop/run_test.rs +++ b/crates/loopal-runtime/tests/agent_loop/run_test.rs @@ -152,7 +152,7 @@ async fn test_ephemeral_unresolved_model_propagates_error_to_result() { let (mut runner, _event_rx) = make_multi_runner(calls); runner.params.config.lifecycle = LifecycleMode::Ephemeral; runner.params.config.router = - loopal_provider_api::ModelRouter::new("unknown-model-xyz".to_string()); + loopal_provider_api::SharedModelRouter::with_default("unknown-model-xyz".to_string()); let output = runner.run().await.unwrap(); assert_eq!(output.terminate_reason, TerminateReason::Error); diff --git a/crates/loopal-test-support/src/wiring.rs b/crates/loopal-test-support/src/wiring.rs index 048fc328..ab89868a 100644 --- a/crates/loopal-test-support/src/wiring.rs +++ b/crates/loopal-test-support/src/wiring.rs @@ -139,7 +139,7 @@ pub(crate) async fn wire(builder: HarnessBuilder) -> (SpawnedHarness, AgentLoopR if let Some(m) = builder.summarization_model { routing.insert(loopal_provider_api::TaskType::Summarization, m); } - loopal_provider_api::ModelRouter::from_parts(builder.model, routing) + loopal_provider_api::SharedModelRouter::from_parts(builder.model, routing) }, system_prompt: builder.system_prompt, mode: builder.mode,