diff --git a/colgrep/README.md b/colgrep/README.md index b9c77d7..be23d0a 100644 --- a/colgrep/README.md +++ b/colgrep/README.md @@ -247,6 +247,12 @@ colgrep settings --fp32 # Reset precision to the build default (FP32 on CUDA, INT8 otherwise) colgrep settings --default-precision +# macOS/CoreML: compiled models are cached by default under +# ~/Library/Caches/next-plaid/coreml (persists across runs = faster startup, and +# avoids the restricted-$TMPDIR failure). Override the location if you want: +colgrep settings --coreml-cache-dir ~/Library/Caches/colgrep/coreml +# (revert to the default location with: colgrep settings --clear-coreml-cache-dir) + # Set embedding pool factor (2 = 50% smaller index, 1 = full precision) colgrep settings --pool-factor 2 diff --git a/colgrep/src/cli.rs b/colgrep/src/cli.rs index 6994d7e..c7740bd 100644 --- a/colgrep/src/cli.rs +++ b/colgrep/src/cli.rs @@ -655,6 +655,16 @@ pub enum Commands { #[arg(long = "default-precision", conflicts_with_all = ["fp32", "int8"])] default_precision: bool, + /// Override the CoreML model cache directory (persists across runs). + /// CoreML already caches compiled models by default under + /// ~/Library/Caches/next-plaid/coreml; use this to choose another location (issue #129). + #[arg(long = "coreml-cache-dir", value_name = "PATH")] + coreml_cache_dir: Option, + + /// Clear the CoreML cache-dir override (revert to the default cache location) + #[arg(long = "clear-coreml-cache-dir", conflicts_with = "coreml_cache_dir")] + clear_coreml_cache_dir: bool, + /// Set default pool factor for embedding compression (use 0 to reset to default 2) /// Higher values = faster search, fewer embeddings. Use 1 to disable pooling. #[arg(long = "pool-factor", value_name = "FACTOR")] diff --git a/colgrep/src/commands/config.rs b/colgrep/src/commands/config.rs index 663e450..32f0bfe 100644 --- a/colgrep/src/commands/config.rs +++ b/colgrep/src/commands/config.rs @@ -100,6 +100,8 @@ pub fn cmd_config( fp32: bool, int8: bool, default_precision: bool, + coreml_cache_dir: Option, + clear_coreml_cache_dir: bool, pool_factor: Option, parallel_sessions: Option, batch_size: Option, @@ -133,6 +135,8 @@ pub fn cmd_config( && !fp32 && !int8 && !default_precision + && coreml_cache_dir.is_none() + && !clear_coreml_cache_dir && pool_factor.is_none() && parallel_sessions.is_none() && batch_size.is_none() @@ -163,6 +167,12 @@ pub fn cmd_config( println!(" precision: {} (build default)", precision); } + // CoreML model cache directory (issue #129) + match config.coreml_cache_dir() { + Some(dir) => println!(" coreml-cache: {}", dir), + None => println!(" coreml-cache: (default: ~/Library/Caches/next-plaid/coreml)"), + } + // Pool factor let pf = config.get_pool_factor(); if config.pool_factor.is_some() { @@ -311,6 +321,20 @@ pub fn cmd_config( changed = true; } + // Set or clear the CoreML model cache directory (issue #129) + if let Some(dir) = coreml_cache_dir { + config.set_coreml_cache_dir(&dir); + println!("✅ Set CoreML model cache directory to: {}", dir); + println!(" CoreML will compile and cache models here."); + changed = true; + } else if clear_coreml_cache_dir { + config.clear_coreml_cache_dir(); + println!( + "✅ Cleared CoreML model cache directory (using default ~/Library/Caches/next-plaid/coreml)" + ); + changed = true; + } + // Set or clear pool factor if let Some(pf) = pool_factor { if pf == 0 { diff --git a/colgrep/src/config.rs b/colgrep/src/config.rs index 4484693..c258940 100644 --- a/colgrep/src/config.rs +++ b/colgrep/src/config.rs @@ -124,6 +124,13 @@ pub struct Config { #[serde(skip_serializing_if = "Option::is_none")] pub fp32: Option, + /// Stable directory for CoreML's compiled-model cache (issue #129). When set, + /// CoreML writes its compiled model bundle here instead of `$TMPDIR`, which on + /// some macOS setups is rootless-restricted and breaks model loading. Unset by + /// default, preserving the standard temp-dir behavior. + #[serde(skip_serializing_if = "Option::is_none")] + pub coreml_cache_dir: Option, + /// Pool factor for embedding compression (default: 2) /// Higher values = fewer embeddings = faster search but less precision /// Set to 1 to disable pooling @@ -270,6 +277,21 @@ impl Config { self.fp32 = None; } + /// Get the configured CoreML model cache directory, if any (issue #129). + pub fn coreml_cache_dir(&self) -> Option<&str> { + self.coreml_cache_dir.as_deref() + } + + /// Set a stable CoreML model cache directory (issue #129). + pub fn set_coreml_cache_dir(&mut self, dir: impl Into) { + self.coreml_cache_dir = Some(dir.into()); + } + + /// Clear the CoreML model cache directory (revert to default `$TMPDIR`). + pub fn clear_coreml_cache_dir(&mut self) { + self.coreml_cache_dir = None; + } + /// Get the pool factor for embedding compression /// Returns the configured value or the default (2) pub fn get_pool_factor(&self) -> usize { @@ -900,4 +922,40 @@ mod tests { assert_eq!(restored.fp32, Some(true)); assert!(restored.use_fp32()); } + + #[test] + fn test_coreml_cache_dir_default_none() { + // Default: unset → uses the default per-user cache dir (issue #129). + let config = Config::default(); + assert!(config.coreml_cache_dir().is_none()); + } + + #[test] + fn test_coreml_cache_dir_set_clear() { + let mut config = Config::default(); + config.set_coreml_cache_dir("/private/tmp/colgrep-coreml"); + assert_eq!( + config.coreml_cache_dir(), + Some("/private/tmp/colgrep-coreml") + ); + config.clear_coreml_cache_dir(); + assert!(config.coreml_cache_dir().is_none()); + } + + #[test] + fn test_coreml_cache_dir_serialization() { + // Persists across runs; absent from JSON when unset (no behavior change). + let mut config = Config::default(); + assert!(!serde_json::to_string(&config) + .unwrap() + .contains("coreml_cache_dir")); + + config.set_coreml_cache_dir("/private/tmp/colgrep-coreml"); + let json = serde_json::to_string(&config).unwrap(); + let restored: Config = serde_json::from_str(&json).unwrap(); + assert_eq!( + restored.coreml_cache_dir(), + Some("/private/tmp/colgrep-coreml") + ); + } } diff --git a/colgrep/src/main.rs b/colgrep/src/main.rs index dbe3276..418cb41 100644 --- a/colgrep/src/main.rs +++ b/colgrep/src/main.rs @@ -14,6 +14,7 @@ use colgrep::{ acceleration::{apply_acceleration_mode, env_acceleration_mode, AccelerationMode}, install_claude_code, install_codex, install_hermes, install_opencode, setup_signal_handler, uninstall_all, uninstall_claude_code, uninstall_codex, uninstall_hermes, uninstall_opencode, + Config, }; use cli::{Cli, Commands}; @@ -22,6 +23,29 @@ use commands::{ cmd_stats, cmd_status, cmd_task_hook, cmd_update, InitOptions, }; +/// Apply the persisted CoreML model cache directory (issue #129). +/// +/// When configured via `colgrep settings --coreml-cache-dir`, export it as +/// `NEXT_PLAID_COREML_CACHE_DIR` so the ONNX layer points CoreML at a stable, +/// writable directory instead of `$TMPDIR` (which is rootless-restricted on some +/// macOS setups). An explicit environment variable always wins; when neither is +/// set, default behavior is unchanged. +/// +/// Runs once at startup before any ONNX session is built and before worker threads +/// spawn, so the `set_var` here is safe. +fn apply_coreml_cache_dir() { + if std::env::var_os("NEXT_PLAID_COREML_CACHE_DIR").is_some() { + return; // explicit environment override wins + } + if let Ok(config) = Config::load() { + if let Some(dir) = config.coreml_cache_dir() { + if !dir.trim().is_empty() { + std::env::set_var("NEXT_PLAID_COREML_CACHE_DIR", dir); + } + } + } +} + fn main() -> Result<()> { // Set up Ctrl+C handler for graceful interruption during indexing // This is non-fatal if it fails (e.g., in environments without signal support) @@ -44,6 +68,7 @@ fn main() -> Result<()> { env_mode }; apply_acceleration_mode(acceleration_mode); + apply_coreml_cache_dir(); // Handle global flags before subcommands if cli.install_claude_code { @@ -266,6 +291,8 @@ fn main() -> Result<()> { fp32, int8, default_precision, + coreml_cache_dir, + clear_coreml_cache_dir, pool_factor, parallel_sessions, batch_size, @@ -289,6 +316,8 @@ fn main() -> Result<()> { fp32, int8, default_precision, + coreml_cache_dir, + clear_coreml_cache_dir, pool_factor, parallel_sessions, batch_size, diff --git a/next-plaid-onnx/src/lib.rs b/next-plaid-onnx/src/lib.rs index 5c6b149..6240ac7 100644 --- a/next-plaid-onnx/src/lib.rs +++ b/next-plaid-onnx/src/lib.rs @@ -367,7 +367,7 @@ fn configure_auto_provider(builder: SessionBuilder) -> Result { if !force_cpu { if let Ok(b) = builder .clone() - .with_execution_providers([CoreMLExecutionProvider::default().build()]) + .with_execution_providers([coreml_execution_provider()]) { return Ok(b); } @@ -456,10 +456,63 @@ fn configure_tensorrt(_builder: SessionBuilder) -> Result { anyhow::bail!("TensorRT support not compiled. Enable the 'tensorrt' feature.") } +/// Read an explicit CoreML model cache directory from `NEXT_PLAID_COREML_CACHE_DIR`. +/// +/// Returns the trimmed value only when set and non-empty. This is how an explicit +/// user choice (e.g. `colgrep settings --coreml-cache-dir`) reaches CoreML. +#[cfg(feature = "coreml")] +fn coreml_cache_dir_from_env() -> Option { + std::env::var("NEXT_PLAID_COREML_CACHE_DIR") + .ok() + .map(|d| d.trim().to_string()) + .filter(|d| !d.is_empty()) +} + +/// Default per-user CoreML model cache directory: `~/Library/Caches/next-plaid/coreml` +/// (honoring `XDG_CACHE_HOME`). Used when no explicit dir is configured, so the +/// compiled model persists across runs and never compiles under `$TMPDIR` (#129). +/// Created on demand; returns `None` if it cannot be created. +#[cfg(feature = "coreml")] +fn default_coreml_cache_dir() -> Option { + use std::path::PathBuf; + let base = std::env::var_os("XDG_CACHE_HOME") + .map(PathBuf::from) + .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join("Library/Caches")))?; + let dir = base.join("next-plaid").join("coreml"); + std::fs::create_dir_all(&dir).ok()?; + Some(dir.to_string_lossy().into_owned()) +} + +/// Build the CoreML execution provider with a persistent model cache directory. +/// +/// CoreML compiles the ONNX model into a CoreML bundle at session creation. With +/// no cache dir, ONNX Runtime compiles into the ephemeral process temp dir +/// (`$TMPDIR`), so the model is **recompiled on every invocation**, and on macOS +/// setups where that dir (under `/var/folders/.../T`) is rootless-restricted the +/// compile fails outright (issue #129). +/// +/// We instead point CoreML at a stable cache dir so the compiled model persists +/// across runs (much faster repeated loads) and never touches `$TMPDIR`. +/// Precedence: `NEXT_PLAID_COREML_CACHE_DIR` (e.g. `colgrep settings +/// --coreml-cache-dir`) → per-user default (`~/Library/Caches/next-plaid/coreml`). +/// If neither can be created, fall back to ORT's default (`$TMPDIR`). +#[cfg(feature = "coreml")] +fn coreml_execution_provider() -> ort::execution_providers::ExecutionProviderDispatch { + let cache_dir = coreml_cache_dir_from_env() + .filter(|d| std::fs::create_dir_all(d).is_ok()) + .or_else(default_coreml_cache_dir); + match cache_dir { + Some(dir) => CoreMLExecutionProvider::default() + .with_model_cache_dir(dir) + .build(), + None => CoreMLExecutionProvider::default().build(), + } +} + #[cfg(feature = "coreml")] fn configure_coreml(builder: SessionBuilder) -> Result { builder - .with_execution_providers([CoreMLExecutionProvider::default().build()]) + .with_execution_providers([coreml_execution_provider()]) .map_err(|e| anyhow::anyhow!("Failed to configure CoreML execution provider: {e:?}")) }