lightonai · raphaelsty · Jun 17, 2026 · Jun 17, 2026
diff --git a/colgrep/README.md b/colgrep/README.md
@@ -241,9 +241,12 @@ colgrep settings --n 10
 # Use INT8 quantized model (faster inference)
 colgrep settings --int8
 
-# Use FP32 full precision (more accurate)
+# Force FP32 full precision (more accurate)
 colgrep settings --fp32
 
+# Reset precision to the build default (FP32 on CUDA, INT8 otherwise)
+colgrep settings --default-precision
+
 # Set embedding pool factor (2 = 50% smaller index, 1 = full precision)
 colgrep settings --pool-factor 2
 

diff --git a/colgrep/src/cli.rs b/colgrep/src/cli.rs
@@ -199,9 +199,12 @@ EXAMPLES:
     # Switch to INT8 quantized model (faster inference)
     colgrep settings --int8
 
-    # Switch back to full-precision (FP32) model (default)
+    # Force the full-precision (FP32) model (model.onnx)
     colgrep settings --fp32
 
+    # Reset precision to the build default (FP32 on CUDA, INT8 otherwise)
+    colgrep settings --default-precision
+
     # Set embedding pool factor (smaller index, faster search)
     colgrep settings --pool-factor 2
 
@@ -252,7 +255,7 @@ NOTES:
     • Use 0 to reset a value to its default
     • These values override the CLI defaults when not explicitly specified
     • Default output is compact (filepath:lines). Use -v or --verbose for full content
-    • FP32 (full-precision) is the default
+    • Precision defaults to FP32 on CUDA builds and INT8 otherwise; --fp32/--int8 force a choice
     • Pool factor 2 (default) reduces index size by ~50%. Use 1 to disable pooling
     • Parallel sessions default to CPU count. Batch-size 1 (default) maximizes throughput
     • Parser recursion depth defaults to 1024. Increase only if needed for deep ASTs
@@ -640,14 +643,18 @@ pub enum Commands {
         #[arg(long = "n")]
         default_n: Option<usize>,
 
-        /// Use full-precision (FP32) model (default)
+        /// Force full-precision (FP32) model (model.onnx)
         #[arg(long, conflicts_with = "int8")]
         fp32: bool,
 
         /// Use INT8 quantized model (faster inference)
         #[arg(long, conflicts_with = "fp32")]
         int8: bool,
 
+        /// Reset model precision to the build default (FP32 on CUDA, INT8 otherwise)
+        #[arg(long = "default-precision", conflicts_with_all = ["fp32", "int8"])]
+        default_precision: bool,
+
         /// Set default pool factor for embedding compression (use 0 to reset to default 2)
         /// Higher values = faster search, fewer embeddings. Use 1 to disable pooling.
         #[arg(long = "pool-factor", value_name = "FACTOR")]

diff --git a/colgrep/src/commands/config.rs b/colgrep/src/commands/config.rs
@@ -99,6 +99,7 @@ pub fn cmd_config(
     default_n: Option<usize>,
     fp32: bool,
     int8: bool,
+    default_precision: bool,
     pool_factor: Option<usize>,
     parallel_sessions: Option<usize>,
     batch_size: Option<usize>,
@@ -131,6 +132,7 @@ pub fn cmd_config(
         && default_n.is_none()
         && !fp32
         && !int8
+        && !default_precision
         && pool_factor.is_none()
         && parallel_sessions.is_none()
         && batch_size.is_none()
@@ -154,10 +156,11 @@ pub fn cmd_config(
         }
 
         // Precision
-        if config.use_fp32() {
-            println!("  precision:   fp32 (default)");
+        let precision = if config.use_fp32() { "fp32" } else { "int8" };
+        if config.fp32.is_some() {
+            println!("  precision:   {}", precision);
         } else {
-            println!("  precision:   int8");
+            println!("  precision:   {} (build default)", precision);
         }
 
         // Pool factor
@@ -291,15 +294,21 @@ pub fn cmd_config(
         changed = true;
     }
 
-    // Set fp32 or int8
+    // Set fp32 or int8 (or reset to the build default)
     if fp32 {
-        config.clear_fp32();
-        println!("✅ Set model precision to FP32 (full-precision, default)");
+        // Persist an explicit override. Clearing would not force FP32 on non-CUDA
+        // builds, where a missing value resolves to INT8 (issue #130).
+        config.set_fp32(true);
+        println!("✅ Set model precision to FP32 (full-precision)");
         changed = true;
     } else if int8 {
         config.set_fp32(false);
         println!("✅ Set model precision to INT8 (quantized)");
         changed = true;
+    } else if default_precision {
+        config.clear_fp32();
+        println!("✅ Reset model precision to build default (FP32 on CUDA, INT8 otherwise)");
+        changed = true;
     }
 
     // Set or clear pool factor

diff --git a/colgrep/src/config.rs b/colgrep/src/config.rs
@@ -867,4 +867,37 @@ mod tests {
         let deserialized: Config = serde_json::from_str(&json).unwrap();
         assert!(deserialized.use_relative_paths());
     }
+
+    #[test]
+    fn test_set_fp32_forces_fp32_regression_130() {
+        // Regression for #130: `settings --fp32` must persist a value that makes
+        // use_fp32() resolve to true on every build. Clearing the field instead
+        // resolves to INT8 on non-CUDA builds, which is the bug.
+        let mut config = Config::default();
+
+        config.set_fp32(true);
+        assert_eq!(config.fp32, Some(true));
+        assert!(
+            config.use_fp32(),
+            "set_fp32(true) must force FP32 regardless of build features"
+        );
+
+        config.set_fp32(false);
+        assert_eq!(config.fp32, Some(false));
+        assert!(!config.use_fp32());
+
+        // `--default-precision` clears the override, reverting to the build default.
+        config.clear_fp32();
+        assert_eq!(config.fp32, None);
+    }
+
+    #[test]
+    fn test_fp32_override_survives_serialization() {
+        let mut config = Config::default();
+        config.set_fp32(true);
+        let json = serde_json::to_string(&config).unwrap();
+        let restored: Config = serde_json::from_str(&json).unwrap();
+        assert_eq!(restored.fp32, Some(true));
+        assert!(restored.use_fp32());
+    }
 }
diff --git a/colgrep/src/main.rs b/colgrep/src/main.rs
@@ -265,6 +265,7 @@ fn main() -> Result<()> {
             default_n,
             fp32,
             int8,
+            default_precision,
             pool_factor,
             parallel_sessions,
             batch_size,
@@ -287,6 +288,7 @@ fn main() -> Result<()> {
             default_n,
             fp32,
             int8,
+            default_precision,
             pool_factor,
             parallel_sessions,
             batch_size,