diff --git a/colgrep/README.md b/colgrep/README.md index f241dcd..b9c77d7 100644 --- a/colgrep/README.md +++ b/colgrep/README.md @@ -241,9 +241,12 @@ colgrep settings --n 10 # Use INT8 quantized model (faster inference) colgrep settings --int8 -# Use FP32 full precision (more accurate) +# Force FP32 full precision (more accurate) colgrep settings --fp32 +# Reset precision to the build default (FP32 on CUDA, INT8 otherwise) +colgrep settings --default-precision + # Set embedding pool factor (2 = 50% smaller index, 1 = full precision) colgrep settings --pool-factor 2 diff --git a/colgrep/src/cli.rs b/colgrep/src/cli.rs index a6fa962..6994d7e 100644 --- a/colgrep/src/cli.rs +++ b/colgrep/src/cli.rs @@ -199,9 +199,12 @@ EXAMPLES: # Switch to INT8 quantized model (faster inference) colgrep settings --int8 - # Switch back to full-precision (FP32) model (default) + # Force the full-precision (FP32) model (model.onnx) colgrep settings --fp32 + # Reset precision to the build default (FP32 on CUDA, INT8 otherwise) + colgrep settings --default-precision + # Set embedding pool factor (smaller index, faster search) colgrep settings --pool-factor 2 @@ -252,7 +255,7 @@ NOTES: • Use 0 to reset a value to its default • These values override the CLI defaults when not explicitly specified • Default output is compact (filepath:lines). Use -v or --verbose for full content - • FP32 (full-precision) is the default + • Precision defaults to FP32 on CUDA builds and INT8 otherwise; --fp32/--int8 force a choice • Pool factor 2 (default) reduces index size by ~50%. Use 1 to disable pooling • Parallel sessions default to CPU count. Batch-size 1 (default) maximizes throughput • Parser recursion depth defaults to 1024. Increase only if needed for deep ASTs @@ -640,7 +643,7 @@ pub enum Commands { #[arg(long = "n")] default_n: Option, - /// Use full-precision (FP32) model (default) + /// Force full-precision (FP32) model (model.onnx) #[arg(long, conflicts_with = "int8")] fp32: bool, @@ -648,6 +651,10 @@ pub enum Commands { #[arg(long, conflicts_with = "fp32")] int8: bool, + /// Reset model precision to the build default (FP32 on CUDA, INT8 otherwise) + #[arg(long = "default-precision", conflicts_with_all = ["fp32", "int8"])] + default_precision: bool, + /// Set default pool factor for embedding compression (use 0 to reset to default 2) /// Higher values = faster search, fewer embeddings. Use 1 to disable pooling. #[arg(long = "pool-factor", value_name = "FACTOR")] diff --git a/colgrep/src/commands/config.rs b/colgrep/src/commands/config.rs index 6ef18f7..663e450 100644 --- a/colgrep/src/commands/config.rs +++ b/colgrep/src/commands/config.rs @@ -99,6 +99,7 @@ pub fn cmd_config( default_n: Option, fp32: bool, int8: bool, + default_precision: bool, pool_factor: Option, parallel_sessions: Option, batch_size: Option, @@ -131,6 +132,7 @@ pub fn cmd_config( && default_n.is_none() && !fp32 && !int8 + && !default_precision && pool_factor.is_none() && parallel_sessions.is_none() && batch_size.is_none() @@ -154,10 +156,11 @@ pub fn cmd_config( } // Precision - if config.use_fp32() { - println!(" precision: fp32 (default)"); + let precision = if config.use_fp32() { "fp32" } else { "int8" }; + if config.fp32.is_some() { + println!(" precision: {}", precision); } else { - println!(" precision: int8"); + println!(" precision: {} (build default)", precision); } // Pool factor @@ -291,15 +294,21 @@ pub fn cmd_config( changed = true; } - // Set fp32 or int8 + // Set fp32 or int8 (or reset to the build default) if fp32 { - config.clear_fp32(); - println!("✅ Set model precision to FP32 (full-precision, default)"); + // Persist an explicit override. Clearing would not force FP32 on non-CUDA + // builds, where a missing value resolves to INT8 (issue #130). + config.set_fp32(true); + println!("✅ Set model precision to FP32 (full-precision)"); changed = true; } else if int8 { config.set_fp32(false); println!("✅ Set model precision to INT8 (quantized)"); changed = true; + } else if default_precision { + config.clear_fp32(); + println!("✅ Reset model precision to build default (FP32 on CUDA, INT8 otherwise)"); + changed = true; } // Set or clear pool factor diff --git a/colgrep/src/config.rs b/colgrep/src/config.rs index db62ec5..4484693 100644 --- a/colgrep/src/config.rs +++ b/colgrep/src/config.rs @@ -867,4 +867,37 @@ mod tests { let deserialized: Config = serde_json::from_str(&json).unwrap(); assert!(deserialized.use_relative_paths()); } + + #[test] + fn test_set_fp32_forces_fp32_regression_130() { + // Regression for #130: `settings --fp32` must persist a value that makes + // use_fp32() resolve to true on every build. Clearing the field instead + // resolves to INT8 on non-CUDA builds, which is the bug. + let mut config = Config::default(); + + config.set_fp32(true); + assert_eq!(config.fp32, Some(true)); + assert!( + config.use_fp32(), + "set_fp32(true) must force FP32 regardless of build features" + ); + + config.set_fp32(false); + assert_eq!(config.fp32, Some(false)); + assert!(!config.use_fp32()); + + // `--default-precision` clears the override, reverting to the build default. + config.clear_fp32(); + assert_eq!(config.fp32, None); + } + + #[test] + fn test_fp32_override_survives_serialization() { + let mut config = Config::default(); + config.set_fp32(true); + let json = serde_json::to_string(&config).unwrap(); + let restored: Config = serde_json::from_str(&json).unwrap(); + assert_eq!(restored.fp32, Some(true)); + assert!(restored.use_fp32()); + } } diff --git a/colgrep/src/main.rs b/colgrep/src/main.rs index 2c3ae27..dbe3276 100644 --- a/colgrep/src/main.rs +++ b/colgrep/src/main.rs @@ -265,6 +265,7 @@ fn main() -> Result<()> { default_n, fp32, int8, + default_precision, pool_factor, parallel_sessions, batch_size, @@ -287,6 +288,7 @@ fn main() -> Result<()> { default_n, fp32, int8, + default_precision, pool_factor, parallel_sessions, batch_size,