Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion colgrep/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,12 @@ colgrep settings --n 10
# Use INT8 quantized model (faster inference)
colgrep settings --int8

# Use FP32 full precision (more accurate)
# Force FP32 full precision (more accurate)
colgrep settings --fp32

# Reset precision to the build default (FP32 on CUDA, INT8 otherwise)
colgrep settings --default-precision

# Set embedding pool factor (2 = 50% smaller index, 1 = full precision)
colgrep settings --pool-factor 2

Expand Down
13 changes: 10 additions & 3 deletions colgrep/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,9 +199,12 @@ EXAMPLES:
# Switch to INT8 quantized model (faster inference)
colgrep settings --int8

# Switch back to full-precision (FP32) model (default)
# Force the full-precision (FP32) model (model.onnx)
colgrep settings --fp32

# Reset precision to the build default (FP32 on CUDA, INT8 otherwise)
colgrep settings --default-precision

# Set embedding pool factor (smaller index, faster search)
colgrep settings --pool-factor 2

Expand Down Expand Up @@ -252,7 +255,7 @@ NOTES:
• Use 0 to reset a value to its default
• These values override the CLI defaults when not explicitly specified
• Default output is compact (filepath:lines). Use -v or --verbose for full content
• FP32 (full-precision) is the default
Precision defaults to FP32 on CUDA builds and INT8 otherwise; --fp32/--int8 force a choice
• Pool factor 2 (default) reduces index size by ~50%. Use 1 to disable pooling
• Parallel sessions default to CPU count. Batch-size 1 (default) maximizes throughput
• Parser recursion depth defaults to 1024. Increase only if needed for deep ASTs
Expand Down Expand Up @@ -640,14 +643,18 @@ pub enum Commands {
#[arg(long = "n")]
default_n: Option<usize>,

/// Use full-precision (FP32) model (default)
/// Force full-precision (FP32) model (model.onnx)
#[arg(long, conflicts_with = "int8")]
fp32: bool,

/// Use INT8 quantized model (faster inference)
#[arg(long, conflicts_with = "fp32")]
int8: bool,

/// Reset model precision to the build default (FP32 on CUDA, INT8 otherwise)
#[arg(long = "default-precision", conflicts_with_all = ["fp32", "int8"])]
default_precision: bool,

/// Set default pool factor for embedding compression (use 0 to reset to default 2)
/// Higher values = faster search, fewer embeddings. Use 1 to disable pooling.
#[arg(long = "pool-factor", value_name = "FACTOR")]
Expand Down
21 changes: 15 additions & 6 deletions colgrep/src/commands/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ pub fn cmd_config(
default_n: Option<usize>,
fp32: bool,
int8: bool,
default_precision: bool,
pool_factor: Option<usize>,
parallel_sessions: Option<usize>,
batch_size: Option<usize>,
Expand Down Expand Up @@ -131,6 +132,7 @@ pub fn cmd_config(
&& default_n.is_none()
&& !fp32
&& !int8
&& !default_precision
&& pool_factor.is_none()
&& parallel_sessions.is_none()
&& batch_size.is_none()
Expand All @@ -154,10 +156,11 @@ pub fn cmd_config(
}

// Precision
if config.use_fp32() {
println!(" precision: fp32 (default)");
let precision = if config.use_fp32() { "fp32" } else { "int8" };
if config.fp32.is_some() {
println!(" precision: {}", precision);
} else {
println!(" precision: int8");
println!(" precision: {} (build default)", precision);
}

// Pool factor
Expand Down Expand Up @@ -291,15 +294,21 @@ pub fn cmd_config(
changed = true;
}

// Set fp32 or int8
// Set fp32 or int8 (or reset to the build default)
if fp32 {
config.clear_fp32();
println!("✅ Set model precision to FP32 (full-precision, default)");
// Persist an explicit override. Clearing would not force FP32 on non-CUDA
// builds, where a missing value resolves to INT8 (issue #130).
config.set_fp32(true);
println!("✅ Set model precision to FP32 (full-precision)");
changed = true;
} else if int8 {
config.set_fp32(false);
println!("✅ Set model precision to INT8 (quantized)");
changed = true;
} else if default_precision {
config.clear_fp32();
println!("✅ Reset model precision to build default (FP32 on CUDA, INT8 otherwise)");
changed = true;
}

// Set or clear pool factor
Expand Down
33 changes: 33 additions & 0 deletions colgrep/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -867,4 +867,37 @@ mod tests {
let deserialized: Config = serde_json::from_str(&json).unwrap();
assert!(deserialized.use_relative_paths());
}

#[test]
fn test_set_fp32_forces_fp32_regression_130() {
// Regression for #130: `settings --fp32` must persist a value that makes
// use_fp32() resolve to true on every build. Clearing the field instead
// resolves to INT8 on non-CUDA builds, which is the bug.
let mut config = Config::default();

config.set_fp32(true);
assert_eq!(config.fp32, Some(true));
assert!(
config.use_fp32(),
"set_fp32(true) must force FP32 regardless of build features"
);

config.set_fp32(false);
assert_eq!(config.fp32, Some(false));
assert!(!config.use_fp32());

// `--default-precision` clears the override, reverting to the build default.
config.clear_fp32();
assert_eq!(config.fp32, None);
}

#[test]
fn test_fp32_override_survives_serialization() {
let mut config = Config::default();
config.set_fp32(true);
let json = serde_json::to_string(&config).unwrap();
let restored: Config = serde_json::from_str(&json).unwrap();
assert_eq!(restored.fp32, Some(true));
assert!(restored.use_fp32());
}
}
2 changes: 2 additions & 0 deletions colgrep/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ fn main() -> Result<()> {
default_n,
fp32,
int8,
default_precision,
pool_factor,
parallel_sessions,
batch_size,
Expand All @@ -287,6 +288,7 @@ fn main() -> Result<()> {
default_n,
fp32,
int8,
default_precision,
pool_factor,
parallel_sessions,
batch_size,
Expand Down
Loading