diff --git a/colgrep/README.md b/colgrep/README.md
index b9c77d7..be23d0a 100644
--- a/colgrep/README.md
+++ b/colgrep/README.md
@@ -247,6 +247,12 @@ colgrep settings --fp32
 # Reset precision to the build default (FP32 on CUDA, INT8 otherwise)
 colgrep settings --default-precision
 
+# macOS/CoreML: compiled models are cached by default under
+# ~/Library/Caches/next-plaid/coreml (persists across runs = faster startup, and
+# avoids the restricted-$TMPDIR failure). Override the location if you want:
+colgrep settings --coreml-cache-dir ~/Library/Caches/colgrep/coreml
+# (revert to the default location with: colgrep settings --clear-coreml-cache-dir)
+
 # Set embedding pool factor (2 = 50% smaller index, 1 = full precision)
 colgrep settings --pool-factor 2
 
diff --git a/colgrep/src/cli.rs b/colgrep/src/cli.rs
index 6994d7e..c7740bd 100644
--- a/colgrep/src/cli.rs
+++ b/colgrep/src/cli.rs
@@ -655,6 +655,16 @@ pub enum Commands {
         #[arg(long = "default-precision", conflicts_with_all = ["fp32", "int8"])]
         default_precision: bool,
 
+        /// Override the CoreML model cache directory (persists across runs).
+        /// CoreML already caches compiled models by default under
+        /// ~/Library/Caches/next-plaid/coreml; use this to choose another location (issue #129).
+        #[arg(long = "coreml-cache-dir", value_name = "PATH")]
+        coreml_cache_dir: Option<String>,
+
+        /// Clear the CoreML cache-dir override (revert to the default cache location)
+        #[arg(long = "clear-coreml-cache-dir", conflicts_with = "coreml_cache_dir")]
+        clear_coreml_cache_dir: bool,
+
         /// Set default pool factor for embedding compression (use 0 to reset to default 2)
         /// Higher values = faster search, fewer embeddings. Use 1 to disable pooling.
         #[arg(long = "pool-factor", value_name = "FACTOR")]
diff --git a/colgrep/src/commands/config.rs b/colgrep/src/commands/config.rs
index 663e450..32f0bfe 100644
--- a/colgrep/src/commands/config.rs
+++ b/colgrep/src/commands/config.rs
@@ -100,6 +100,8 @@ pub fn cmd_config(
     fp32: bool,
     int8: bool,
     default_precision: bool,
+    coreml_cache_dir: Option<String>,
+    clear_coreml_cache_dir: bool,
     pool_factor: Option<usize>,
     parallel_sessions: Option<usize>,
     batch_size: Option<usize>,
@@ -133,6 +135,8 @@ pub fn cmd_config(
         && !fp32
         && !int8
         && !default_precision
+        && coreml_cache_dir.is_none()
+        && !clear_coreml_cache_dir
         && pool_factor.is_none()
         && parallel_sessions.is_none()
         && batch_size.is_none()
@@ -163,6 +167,12 @@ pub fn cmd_config(
             println!("  precision:   {} (build default)", precision);
         }
 
+        // CoreML model cache directory (issue #129)
+        match config.coreml_cache_dir() {
+            Some(dir) => println!("  coreml-cache: {}", dir),
+            None => println!("  coreml-cache: (default: ~/Library/Caches/next-plaid/coreml)"),
+        }
+
         // Pool factor
         let pf = config.get_pool_factor();
         if config.pool_factor.is_some() {
@@ -311,6 +321,20 @@ pub fn cmd_config(
         changed = true;
     }
 
+    // Set or clear the CoreML model cache directory (issue #129)
+    if let Some(dir) = coreml_cache_dir {
+        config.set_coreml_cache_dir(&dir);
+        println!("✅ Set CoreML model cache directory to: {}", dir);
+        println!("   CoreML will compile and cache models here.");
+        changed = true;
+    } else if clear_coreml_cache_dir {
+        config.clear_coreml_cache_dir();
+        println!(
+            "✅ Cleared CoreML model cache directory (using default ~/Library/Caches/next-plaid/coreml)"
+        );
+        changed = true;
+    }
+
     // Set or clear pool factor
     if let Some(pf) = pool_factor {
         if pf == 0 {
diff --git a/colgrep/src/config.rs b/colgrep/src/config.rs
index 4484693..c258940 100644
--- a/colgrep/src/config.rs
+++ b/colgrep/src/config.rs
@@ -124,6 +124,13 @@ pub struct Config {
     #[serde(skip_serializing_if = "Option::is_none")]
     pub fp32: Option<bool>,
 
+    /// Stable directory for CoreML's compiled-model cache (issue #129). When set,
+    /// CoreML writes its compiled model bundle here instead of `$TMPDIR`, which on
+    /// some macOS setups is rootless-restricted and breaks model loading. Unset by
+    /// default, preserving the standard temp-dir behavior.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub coreml_cache_dir: Option<String>,
+
     /// Pool factor for embedding compression (default: 2)
     /// Higher values = fewer embeddings = faster search but less precision
     /// Set to 1 to disable pooling
@@ -270,6 +277,21 @@ impl Config {
         self.fp32 = None;
     }
 
+    /// Get the configured CoreML model cache directory, if any (issue #129).
+    pub fn coreml_cache_dir(&self) -> Option<&str> {
+        self.coreml_cache_dir.as_deref()
+    }
+
+    /// Set a stable CoreML model cache directory (issue #129).
+    pub fn set_coreml_cache_dir(&mut self, dir: impl Into<String>) {
+        self.coreml_cache_dir = Some(dir.into());
+    }
+
+    /// Clear the CoreML model cache directory (revert to default `$TMPDIR`).
+    pub fn clear_coreml_cache_dir(&mut self) {
+        self.coreml_cache_dir = None;
+    }
+
     /// Get the pool factor for embedding compression
     /// Returns the configured value or the default (2)
     pub fn get_pool_factor(&self) -> usize {
@@ -900,4 +922,40 @@ mod tests {
         assert_eq!(restored.fp32, Some(true));
         assert!(restored.use_fp32());
     }
+
+    #[test]
+    fn test_coreml_cache_dir_default_none() {
+        // Default: unset → uses the default per-user cache dir (issue #129).
+        let config = Config::default();
+        assert!(config.coreml_cache_dir().is_none());
+    }
+
+    #[test]
+    fn test_coreml_cache_dir_set_clear() {
+        let mut config = Config::default();
+        config.set_coreml_cache_dir("/private/tmp/colgrep-coreml");
+        assert_eq!(
+            config.coreml_cache_dir(),
+            Some("/private/tmp/colgrep-coreml")
+        );
+        config.clear_coreml_cache_dir();
+        assert!(config.coreml_cache_dir().is_none());
+    }
+
+    #[test]
+    fn test_coreml_cache_dir_serialization() {
+        // Persists across runs; absent from JSON when unset (no behavior change).
+        let mut config = Config::default();
+        assert!(!serde_json::to_string(&config)
+            .unwrap()
+            .contains("coreml_cache_dir"));
+
+        config.set_coreml_cache_dir("/private/tmp/colgrep-coreml");
+        let json = serde_json::to_string(&config).unwrap();
+        let restored: Config = serde_json::from_str(&json).unwrap();
+        assert_eq!(
+            restored.coreml_cache_dir(),
+            Some("/private/tmp/colgrep-coreml")
+        );
+    }
 }
diff --git a/colgrep/src/main.rs b/colgrep/src/main.rs
index dbe3276..418cb41 100644
--- a/colgrep/src/main.rs
+++ b/colgrep/src/main.rs
@@ -14,6 +14,7 @@ use colgrep::{
     acceleration::{apply_acceleration_mode, env_acceleration_mode, AccelerationMode},
     install_claude_code, install_codex, install_hermes, install_opencode, setup_signal_handler,
     uninstall_all, uninstall_claude_code, uninstall_codex, uninstall_hermes, uninstall_opencode,
+    Config,
 };
 
 use cli::{Cli, Commands};
@@ -22,6 +23,29 @@ use commands::{
     cmd_stats, cmd_status, cmd_task_hook, cmd_update, InitOptions,
 };
 
+/// Apply the persisted CoreML model cache directory (issue #129).
+///
+/// When configured via `colgrep settings --coreml-cache-dir`, export it as
+/// `NEXT_PLAID_COREML_CACHE_DIR` so the ONNX layer points CoreML at a stable,
+/// writable directory instead of `$TMPDIR` (which is rootless-restricted on some
+/// macOS setups). An explicit environment variable always wins; when neither is
+/// set, default behavior is unchanged.
+///
+/// Runs once at startup before any ONNX session is built and before worker threads
+/// spawn, so the `set_var` here is safe.
+fn apply_coreml_cache_dir() {
+    if std::env::var_os("NEXT_PLAID_COREML_CACHE_DIR").is_some() {
+        return; // explicit environment override wins
+    }
+    if let Ok(config) = Config::load() {
+        if let Some(dir) = config.coreml_cache_dir() {
+            if !dir.trim().is_empty() {
+                std::env::set_var("NEXT_PLAID_COREML_CACHE_DIR", dir);
+            }
+        }
+    }
+}
+
 fn main() -> Result<()> {
     // Set up Ctrl+C handler for graceful interruption during indexing
     // This is non-fatal if it fails (e.g., in environments without signal support)
@@ -44,6 +68,7 @@ fn main() -> Result<()> {
         env_mode
     };
     apply_acceleration_mode(acceleration_mode);
+    apply_coreml_cache_dir();
 
     // Handle global flags before subcommands
     if cli.install_claude_code {
@@ -266,6 +291,8 @@ fn main() -> Result<()> {
             fp32,
             int8,
             default_precision,
+            coreml_cache_dir,
+            clear_coreml_cache_dir,
             pool_factor,
             parallel_sessions,
             batch_size,
@@ -289,6 +316,8 @@ fn main() -> Result<()> {
             fp32,
             int8,
             default_precision,
+            coreml_cache_dir,
+            clear_coreml_cache_dir,
             pool_factor,
             parallel_sessions,
             batch_size,
diff --git a/next-plaid-onnx/src/lib.rs b/next-plaid-onnx/src/lib.rs
index 5c6b149..6240ac7 100644
--- a/next-plaid-onnx/src/lib.rs
+++ b/next-plaid-onnx/src/lib.rs
@@ -367,7 +367,7 @@ fn configure_auto_provider(builder: SessionBuilder) -> Result<SessionBuilder> {
     if !force_cpu {
         if let Ok(b) = builder
             .clone()
-            .with_execution_providers([CoreMLExecutionProvider::default().build()])
+            .with_execution_providers([coreml_execution_provider()])
         {
             return Ok(b);
         }
@@ -456,10 +456,63 @@ fn configure_tensorrt(_builder: SessionBuilder) -> Result<SessionBuilder> {
     anyhow::bail!("TensorRT support not compiled. Enable the 'tensorrt' feature.")
 }
 
+/// Read an explicit CoreML model cache directory from `NEXT_PLAID_COREML_CACHE_DIR`.
+///
+/// Returns the trimmed value only when set and non-empty. This is how an explicit
+/// user choice (e.g. `colgrep settings --coreml-cache-dir`) reaches CoreML.
+#[cfg(feature = "coreml")]
+fn coreml_cache_dir_from_env() -> Option<String> {
+    std::env::var("NEXT_PLAID_COREML_CACHE_DIR")
+        .ok()
+        .map(|d| d.trim().to_string())
+        .filter(|d| !d.is_empty())
+}
+
+/// Default per-user CoreML model cache directory: `~/Library/Caches/next-plaid/coreml`
+/// (honoring `XDG_CACHE_HOME`). Used when no explicit dir is configured, so the
+/// compiled model persists across runs and never compiles under `$TMPDIR` (#129).
+/// Created on demand; returns `None` if it cannot be created.
+#[cfg(feature = "coreml")]
+fn default_coreml_cache_dir() -> Option<String> {
+    use std::path::PathBuf;
+    let base = std::env::var_os("XDG_CACHE_HOME")
+        .map(PathBuf::from)
+        .or_else(|| std::env::var_os("HOME").map(|h| PathBuf::from(h).join("Library/Caches")))?;
+    let dir = base.join("next-plaid").join("coreml");
+    std::fs::create_dir_all(&dir).ok()?;
+    Some(dir.to_string_lossy().into_owned())
+}
+
+/// Build the CoreML execution provider with a persistent model cache directory.
+///
+/// CoreML compiles the ONNX model into a CoreML bundle at session creation. With
+/// no cache dir, ONNX Runtime compiles into the ephemeral process temp dir
+/// (`$TMPDIR`), so the model is **recompiled on every invocation**, and on macOS
+/// setups where that dir (under `/var/folders/.../T`) is rootless-restricted the
+/// compile fails outright (issue #129).
+///
+/// We instead point CoreML at a stable cache dir so the compiled model persists
+/// across runs (much faster repeated loads) and never touches `$TMPDIR`.
+/// Precedence: `NEXT_PLAID_COREML_CACHE_DIR` (e.g. `colgrep settings
+/// --coreml-cache-dir`) → per-user default (`~/Library/Caches/next-plaid/coreml`).
+/// If neither can be created, fall back to ORT's default (`$TMPDIR`).
+#[cfg(feature = "coreml")]
+fn coreml_execution_provider() -> ort::execution_providers::ExecutionProviderDispatch {
+    let cache_dir = coreml_cache_dir_from_env()
+        .filter(|d| std::fs::create_dir_all(d).is_ok())
+        .or_else(default_coreml_cache_dir);
+    match cache_dir {
+        Some(dir) => CoreMLExecutionProvider::default()
+            .with_model_cache_dir(dir)
+            .build(),
+        None => CoreMLExecutionProvider::default().build(),
+    }
+}
+
 #[cfg(feature = "coreml")]
 fn configure_coreml(builder: SessionBuilder) -> Result<SessionBuilder> {
     builder
-        .with_execution_providers([CoreMLExecutionProvider::default().build()])
+        .with_execution_providers([coreml_execution_provider()])
         .map_err(|e| anyhow::anyhow!("Failed to configure CoreML execution provider: {e:?}"))
 }