lightonai · aussetg · May 25, 2026 · May 25, 2026 · May 26, 2026 · May 26, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/colgrep/README.md b/colgrep/README.md
@@ -219,6 +219,7 @@ colgrep --json "auth" | jq '.[] | .unit.file'
 | `colgrep clear`          | Clear index for current project        |
 | `colgrep clear --all`    | Clear all indexes                      |
 | `colgrep set-model <ID>` | Change the default ColBERT model       |
+| `colgrep warm-cache --provider migraphx` | Warm MIGraphX runtime caches |
 | `colgrep settings`       | View or modify configuration           |
 | `colgrep settings --ignore` | Add extra ignore patterns (persistent) |
 | `colgrep settings --force-include` | Force-include normally ignored paths |
@@ -250,7 +251,7 @@ colgrep settings --pool-factor 2
 # Set parallel encoding sessions (default: CPU count, max 16)
 colgrep settings --parallel 8
 
-# Set batch size per session (default: 1 for CPU, 64 for CUDA)
+# Set batch size per session (default: 1 for CPU, 64 for GPU inference providers)
 colgrep settings --batch-size 2
 
 # Set parser recursion depth guard (default: 1024)
@@ -493,6 +494,22 @@ This is useful for:
 - **CI/dev setup** scripts where you want indexing to happen ahead of time
 - **Updating** the index after pulling new code
 
+### `colgrep warm-cache`
+
+Warm provider-specific runtime caches without indexing. For MIGraphX this
+pre-compiles only eligible expensive static shapes that colgrep can later reuse
+for experimental GPU indexing. Cold or ineligible shapes continue to use CPU;
+warming can take minutes and usually only pays off for repeated large batches.
+
+```bash
+colgrep warm-cache --provider migraphx
+colgrep warm-cache --provider migraphx --batch-size 64 --max-sequence-len 1024
+```
+
+Advanced MIGraphX thresholds can be tuned with
+`NEXT_PLAID_MIGRAPHX_MIN_STATIC_SHAPE_TOKENS` and
+`NEXT_PLAID_MIGRAPHX_MIN_RUN_TOKENS`.
+
 ```bash
 # Check index status
 colgrep status
@@ -642,14 +659,29 @@ Then: `cargo install colgrep --features openblas`
 
 ### ONNX Runtime
 
-ONNX Runtime is downloaded automatically on first use. No manual installation required.
+ONNX Runtime CPU and CUDA builds are downloaded automatically on first use.
+ROCm/MIGraphX builds are ROCm-versioned and are not downloaded automatically;
+install AMD's wheel and point ColGREP at its runtime library if auto-discovery
+does not find it:
+
+```bash
+pip install onnxruntime-migraphx \
+  -f https://repo.radeon.com/rocm/manylinux/rocm-rel-<ROCM_VERSION>/
+
+export ORT_DYLIB_PATH=/path/to/site-packages/onnxruntime/capi/libonnxruntime.so
+colgrep --force-gpu search "your query"
+```
 
 Lookup order:
 
 1. `ORT_DYLIB_PATH` environment variable
-2. Python environments (pip/conda/venv)
-3. System paths
-4. Auto-download to `~/.cache/onnxruntime/`
+2. MIGraphX-capable Python/system installs (`migraphx` builds)
+3. Python environments (pip/conda/venv)
+4. System paths
+5. Auto-download to `~/.cache/colgrep/onnxruntime/`
+
+On Linux, ColGREP may re-exec itself once to add the ONNX Runtime, cuDNN, or
+ROCm library directories to `LD_LIBRARY_PATH` before ONNX Runtime is loaded.
 
 ---
 

diff --git a/colgrep/src/acceleration.rs b/colgrep/src/acceleration.rs
@@ -1,4 +1,5 @@
 use anyhow::{bail, Result};
+use next_plaid_onnx::ExecutionProvider;
 
 const FORCE_CPU_ENV_VARS: &[&str] = &["FORCE_CPU", "COLGREP_FORCE_CPU", "NEXT_PLAID_FORCE_CPU"];
 const FORCE_GPU_ENV_VARS: &[&str] = &["FORCE_GPU", "COLGREP_FORCE_GPU", "NEXT_PLAID_FORCE_GPU"];
@@ -79,6 +80,72 @@ pub fn apply_acceleration_mode(mode: AccelerationMode) {
     }
 }
 
+/// Returns whether an ONNX execution provider is usable for colgrep.
+///
+/// `next-plaid-onnx` can check whether a provider is compiled into the loaded
+/// ONNX Runtime library. colgrep adds the CUDA/cuDNN readiness check because it
+/// manages CUDA library discovery itself on Linux.
+pub fn is_gpu_provider_available(provider: ExecutionProvider) -> bool {
+    if !next_plaid_onnx::is_execution_provider_available(provider) {
+        return false;
+    }
+
+    match provider {
+        ExecutionProvider::Cuda => {
+            #[cfg(feature = "cuda")]
+            {
+                crate::onnx_runtime::is_cudnn_available()
+            }
+            #[cfg(not(feature = "cuda"))]
+            {
+                false
+            }
+        }
+        provider => provider.is_gpu(),
+    }
+}
+
+/// Available GPU execution providers for colgrep in selection order.
+pub fn available_gpu_providers() -> Vec<ExecutionProvider> {
+    next_plaid_onnx::compiled_gpu_execution_providers()
+        .into_iter()
+        .filter(|provider| is_gpu_provider_available(*provider))
+        .collect()
+}
+
+/// Preferred available GPU execution provider for colgrep.
+pub fn preferred_gpu_provider() -> Option<ExecutionProvider> {
+    available_gpu_providers().into_iter().next()
+}
+
+/// Whether colgrep currently has any usable GPU inference provider.
+pub fn has_gpu_provider() -> bool {
+    preferred_gpu_provider().is_some()
+}
+
+/// Require a GPU provider, returning a user-facing diagnostic if none is usable.
+pub fn require_gpu_provider() -> Result<ExecutionProvider> {
+    if let Some(provider) = preferred_gpu_provider() {
+        return Ok(provider);
+    }
+
+    let compiled = next_plaid_onnx::compiled_gpu_execution_providers();
+    if compiled.is_empty() {
+        bail!(
+            "FORCE_GPU is set, but this colgrep binary was compiled without a GPU execution provider. Enable a feature such as 'cuda', 'migraphx', 'coreml', or 'directml'."
+        );
+    }
+
+    let names = compiled
+        .iter()
+        .map(|provider| provider.display_name())
+        .collect::<Vec<_>>()
+        .join(", ");
+    bail!(
+        "FORCE_GPU is set, but no compiled GPU execution provider is available. Compiled provider(s): {names}. For ROCm, set ORT_DYLIB_PATH to an ONNX Runtime build with MIGraphX support; for CUDA, ensure the GPU ONNX Runtime and cuDNN are loadable."
+    )
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;

diff --git a/colgrep/src/cli.rs b/colgrep/src/cli.rs
@@ -1,6 +1,6 @@
 use std::path::PathBuf;
 
-use clap::{Parser, Subcommand};
+use clap::{Parser, Subcommand, ValueEnum};
 
 use crate::color::ColorChoice;
 
@@ -182,6 +182,21 @@ NOTES:
     • Useful for pre-warming the index before searching
     • Subsequent searches will be fast since the index is already built";
 
+pub const WARM_CACHE_HELP: &str = "\
+MIGraphX cache warming is experimental. It can take minutes and usually only
+pays off for repeated large indexing batches. ColGREP never compiles cold
+MIGraphX shapes during normal indexing; cold or ineligible shapes use CPU.
+
+EXAMPLES:
+    # Warm eligible expensive MIGraphX static-shape caches for the configured model
+    colgrep warm-cache --provider migraphx
+
+    # Warm caches for a specific model and batch size
+    colgrep warm-cache --provider migraphx --model lightonai/LateOn-Code-edge --batch-size 64
+
+    # Limit warming to shorter document shapes only
+    colgrep warm-cache --provider migraphx --max-sequence-len 1024";
+
 pub const CONFIG_HELP: &str = "\
 EXAMPLES:
     # Show current configuration
@@ -449,6 +464,11 @@ pub struct Cli {
     pub force_gpu: bool,
 }
 
+#[derive(Copy, Clone, Debug, Eq, PartialEq, ValueEnum)]
+pub enum CacheProvider {
+    Migraphx,
+}
+
 #[derive(Subcommand)]
 pub enum Commands {
     /// Search for code semantically (auto-indexes if needed)
@@ -621,6 +641,26 @@ pub enum Commands {
         static_batch: bool,
     },
 
+    /// Warm provider-specific runtime caches without indexing
+    #[command(name = "warm-cache", after_help = WARM_CACHE_HELP)]
+    WarmCache {
+        /// Cache provider to warm
+        #[arg(long, value_enum, default_value_t = CacheProvider::Migraphx)]
+        provider: CacheProvider,
+
+        /// ColBERT model HuggingFace ID or local path (uses saved preference if not specified)
+        #[arg(long)]
+        model: Option<String>,
+
+        /// Model/session batch size whose MIGraphX static shapes should be warmed
+        #[arg(long = "batch-size", value_name = "SIZE")]
+        batch_size: Option<usize>,
+
+        /// Maximum sequence length of MIGraphX static shapes to warm (default: all eligible expensive shapes)
+        #[arg(long = "max-sequence-len")]
+        max_sequence_len: Option<usize>,
+    },
+
     /// View or set configuration options (default k, n values)
     #[command(name = "settings", after_help = CONFIG_HELP)]
     Settings {