diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 00000000..80df3bc3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,110 @@ +# ferrolearn CI +# +# Gates every push to `main` and every PR targeting `main` on four +# parallel jobs: +# +# - `cargo fmt --check` — strict rustfmt parity with main +# - `cargo clippy -D warnings` — strict lints; the workspace is +# expected to clear with -D warnings +# (math idioms that want range loops +# carry per-function #[allow]) +# - `cargo test --release` — full workspace test suite in release +# (release is significantly faster +# for the math-heavy paths) +# - `cargo doc --no-deps` — `RUSTDOCFLAGS=-D warnings` catches +# broken doc links + missing docs +# errors before they ship to docs.rs +# +# Jobs are independent so a failure in one (e.g. fmt drift) doesn't gate +# the others — you see all the breakages in one CI run instead of +# discovering them serially. +# +# Concurrency: a new push to the same ref (branch / PR) cancels any +# in-flight run. Prevents redundant work when contributors push a fix +# while CI is still running on the prior commit. +# +# Permissions: read-only on `contents` — the workflow doesn't push, doesn't +# comment, doesn't deploy. Smallest blast radius. +# +# Caching: Swatinem/rust-cache@v2 keys on Cargo.lock + the runner image. +# First run on a PR pays the cold-compile cost; subsequent pushes to the +# same branch reuse the target/ + ~/.cargo state. +# +# Stable toolchain only. MSRV (`rust-version = "1.85"` in workspace +# Cargo.toml) is not enforced here — add a separate matrix job if MSRV +# regressions become a problem in practice. + +name: CI + +on: + push: + branches: [main] + pull_request: + branches: [main] + +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +env: + CARGO_TERM_COLOR: always + RUST_BACKTRACE: 1 + +jobs: + fmt: + name: cargo fmt --check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust stable + rustfmt + run: rustup toolchain install stable --component rustfmt --profile minimal --no-self-update + - run: cargo fmt --check + + clippy: + name: cargo clippy -D warnings + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust stable + clippy + run: rustup toolchain install stable --component clippy --profile minimal --no-self-update + - name: Cache cargo + uses: Swatinem/rust-cache@v2 + with: + shared-key: clippy + - run: cargo clippy --workspace --all-targets -- -D warnings + + test: + name: cargo test --release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust stable + run: rustup toolchain install stable --profile minimal --no-self-update + - name: Cache cargo + uses: Swatinem/rust-cache@v2 + with: + shared-key: test + # `--release` is faster than dev for the linear-algebra-heavy paths + # and matches the perf characteristics users will hit in practice. + - run: cargo test --workspace --release --lib --tests + + doc: + name: cargo doc --no-deps + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install Rust stable + run: rustup toolchain install stable --profile minimal --no-self-update + - name: Cache cargo + uses: Swatinem/rust-cache@v2 + with: + shared-key: doc + # Builds docs without strict warnings — the workspace currently has + # several broken intra-doc links (`FerroError::*` variants, etc.) + # that should be cleaned up as a follow-up PR. Tighten to + # `RUSTDOCFLAGS: -D warnings` once those land. For now, this catches + # syntax-level doc errors that would break the docs.rs build. + - run: cargo doc --workspace --no-deps --document-private-items diff --git a/ferrolearn-bayes/src/categorical.rs b/ferrolearn-bayes/src/categorical.rs index 29106c2c..9bed180b 100644 --- a/ferrolearn-bayes/src/categorical.rs +++ b/ferrolearn-bayes/src/categorical.rs @@ -305,8 +305,7 @@ impl Fit, Array1> for Categor } // Per-class, per-category count table. - let mut counts_for_feature: Vec> = - vec![vec![0usize; cats.len()]; n_classes]; + let mut counts_for_feature: Vec> = vec![vec![0usize; cats.len()]; n_classes]; for (ci, indices) in class_indices.iter().enumerate() { for &sample_idx in indices { let val = x[[sample_idx, j]].to_usize().unwrap_or(0); @@ -321,8 +320,7 @@ impl Fit, Array1> for Categor } // Cached log probabilities derived from counts. - let feature_log_prob = - recompute_feature_log_prob(&category_counts, &class_counts, alpha); + let feature_log_prob = recompute_feature_log_prob(&category_counts, &class_counts, alpha); // Resolve priors. let class_log_prior = @@ -345,6 +343,7 @@ impl Fit, Array1> for Categor /// Recompute `feature_log_prob[j][c][k]` from raw `category_counts` and /// `class_counts`, using Laplace smoothing. +#[allow(clippy::needless_range_loop)] // matrix-style triple indexing reads cleaner than nested .iter().enumerate() fn recompute_feature_log_prob( category_counts: &[Vec>], class_counts: &[usize], @@ -374,6 +373,7 @@ fn recompute_feature_log_prob( /// Resolve `class_log_prior` from `class_counts`, honoring an optional /// explicit `class_prior` and the `fit_prior` flag. +#[allow(clippy::needless_range_loop)] // index-by-class is the natural loop here fn resolve_class_log_prior( class_counts: &[usize], n_classes: usize, diff --git a/ferrolearn-bayes/src/lib.rs b/ferrolearn-bayes/src/lib.rs index 94989a06..b011dc74 100644 --- a/ferrolearn-bayes/src/lib.rs +++ b/ferrolearn-bayes/src/lib.rs @@ -91,10 +91,7 @@ pub(crate) fn log_softmax_rows(jll: &Array2) -> Array2 { let n_classes = jll.ncols(); let mut log_proba = Array2::::zeros((n_samples, n_classes)); for i in 0..n_samples { - let max_score = jll - .row(i) - .iter() - .fold(F::neg_infinity(), |a, &b| a.max(b)); + let max_score = jll.row(i).iter().fold(F::neg_infinity(), |a, &b| a.max(b)); let mut sum_exp = F::zero(); for ci in 0..n_classes { sum_exp = sum_exp + (jll[[i, ci]] - max_score).exp(); diff --git a/ferrolearn-bayes/tests/api_proof.rs b/ferrolearn-bayes/tests/api_proof.rs index fdb06585..ac0c597e 100644 --- a/ferrolearn-bayes/tests/api_proof.rs +++ b/ferrolearn-bayes/tests/api_proof.rs @@ -52,7 +52,12 @@ fn assert_log_proba_consistent(log_proba: &Array2, proba: &Array2) { let p = proba[[i, ci]]; // Avoid log(0) at boundaries. if p > 1e-100 { - assert_relative_eq!(log_proba[[i, ci]], p.ln(), epsilon = 1e-9, max_relative = 1e-9); + assert_relative_eq!( + log_proba[[i, ci]], + p.ln(), + epsilon = 1e-9, + max_relative = 1e-9 + ); } } } @@ -347,16 +352,38 @@ fn api_proof_conjugate_normal_normal() { // ============================================================================= #[test] fn api_proof_f32_compiles() { - let x32 = Array2::from_shape_vec((4, 2), vec![1.0f32, 2.0, 1.0, 2.5, 5.0, 6.0, 5.5, 6.0]).unwrap(); + let x32 = + Array2::from_shape_vec((4, 2), vec![1.0f32, 2.0, 1.0, 2.5, 5.0, 6.0, 5.5, 6.0]).unwrap(); let y = array![0usize, 0, 1, 1]; - let _ = GaussianNB::::new().fit(&x32, &y).unwrap().predict(&x32).unwrap(); - let _ = MultinomialNB::::new().fit(&x32, &y).unwrap().predict(&x32).unwrap(); - let _ = BernoulliNB::::new().fit(&x32, &y).unwrap().predict(&x32).unwrap(); - let _ = ComplementNB::::new().fit(&x32, &y).unwrap().predict(&x32).unwrap(); - - let x_cat = Array2::from_shape_vec((4, 2), vec![0.0f32, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0]).unwrap(); - let _ = CategoricalNB::::new().fit(&x_cat, &y).unwrap().predict(&x_cat).unwrap(); + let _ = GaussianNB::::new() + .fit(&x32, &y) + .unwrap() + .predict(&x32) + .unwrap(); + let _ = MultinomialNB::::new() + .fit(&x32, &y) + .unwrap() + .predict(&x32) + .unwrap(); + let _ = BernoulliNB::::new() + .fit(&x32, &y) + .unwrap() + .predict(&x32) + .unwrap(); + let _ = ComplementNB::::new() + .fit(&x32, &y) + .unwrap() + .predict(&x32) + .unwrap(); + + let x_cat = + Array2::from_shape_vec((4, 2), vec![0.0f32, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 1.0]).unwrap(); + let _ = CategoricalNB::::new() + .fit(&x_cat, &y) + .unwrap() + .predict(&x_cat) + .unwrap(); } // ============================================================================= diff --git a/ferrolearn-bayes/tests/conformance_surface_coverage.rs b/ferrolearn-bayes/tests/conformance_surface_coverage.rs index 158f873d..41757362 100644 --- a/ferrolearn-bayes/tests/conformance_surface_coverage.rs +++ b/ferrolearn-bayes/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-bayes/tests/conformance_wave4.rs b/ferrolearn-bayes/tests/conformance_wave4.rs index fae01db7..a8b379a3 100644 --- a/ferrolearn-bayes/tests/conformance_wave4.rs +++ b/ferrolearn-bayes/tests/conformance_wave4.rs @@ -27,10 +27,11 @@ fn conformance_categorical_nb() { .iter() .map(|v| v.as_u64().unwrap() as usize) .collect(); - let matches = preds.iter().zip(expected.iter()).filter(|(a, e)| a == e).count(); + let matches = preds + .iter() + .zip(expected.iter()) + .filter(|(a, e)| a == e) + .count(); let acc = matches as f64 / preds.len() as f64; - assert!( - acc >= 0.95, - "CategoricalNB accuracy {acc:.4} < 0.95 floor" - ); + assert!(acc >= 0.95, "CategoricalNB accuracy {acc:.4} < 0.95 floor"); } diff --git a/ferrolearn-bench/benches/kernel_methods.rs b/ferrolearn-bench/benches/kernel_methods.rs index 401bdc4f..c8ff2039 100644 --- a/ferrolearn-bench/benches/kernel_methods.rs +++ b/ferrolearn-bench/benches/kernel_methods.rs @@ -1,11 +1,10 @@ //! Kernel methods benchmarks: KernelRidge, GaussianProcess, Nystroem, RBFSampler. use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; -use ferrolearn_bench::{regression_data}; +use ferrolearn_bench::regression_data; use ferrolearn_core::{Fit, Predict, Transform}; use ferrolearn_kernel::{ - GaussianProcessRegressor, KernelRidge, Nystroem, RBFSampler, - gp_kernels::RBFKernel, + GaussianProcessRegressor, KernelRidge, Nystroem, RBFSampler, gp_kernels::RBFKernel, }; const KERNEL_SIZES: &[(&str, usize, usize)] = &[ diff --git a/ferrolearn-bench/src/bin/harness.rs b/ferrolearn-bench/src/bin/harness.rs index 4148deb1..c1830d73 100644 --- a/ferrolearn-bench/src/bin/harness.rs +++ b/ferrolearn-bench/src/bin/harness.rs @@ -42,14 +42,13 @@ use ferrolearn_decomp::{ }; use ferrolearn_kernel::{KernelRidge, Nystroem, RBFSampler}; use ferrolearn_linear::{ - ARDRegression, BayesianRidge, ElasticNet, HuberRegressor, Lasso, LinearRegression, - LinearSVC, LogisticRegression, QDA, QuantileRegressor, Ridge, RidgeClassifier, + ARDRegression, BayesianRidge, ElasticNet, HuberRegressor, Lasso, LinearRegression, LinearSVC, + LogisticRegression, QDA, QuantileRegressor, Ridge, RidgeClassifier, }; use ferrolearn_neighbors::{KNeighborsClassifier, KNeighborsRegressor, NearestCentroid}; use ferrolearn_preprocess::{ BinEncoding, BinStrategy, KBinsDiscretizer, MaxAbsScaler, MinMaxScaler, Normalizer, - PolynomialFeatures, PowerTransformer, RobustScaler, StandardScaler, - normalizer::NormType, + PolynomialFeatures, PowerTransformer, RobustScaler, StandardScaler, normalizer::NormType, }; use ferrolearn_tree::{ AdaBoostClassifier, BaggingClassifier, DecisionTreeClassifier, DecisionTreeRegressor, @@ -167,45 +166,191 @@ fn bench_regressors(records: &mut Vec) { let (x, y) = regression_data(n, p); let (xtr, xte, ytr, yte) = split_regression(&x, &y); - reg_bench!(records, "LinearRegression", label, n, p, - &xtr, &xte, &ytr, &yte, LinearRegression::::new()); - reg_bench!(records, "Ridge", label, n, p, - &xtr, &xte, &ytr, &yte, Ridge::::new()); - reg_bench!(records, "Lasso", label, n, p, - &xtr, &xte, &ytr, &yte, Lasso::::new()); - reg_bench!(records, "ElasticNet", label, n, p, - &xtr, &xte, &ytr, &yte, ElasticNet::::new()); - reg_bench!(records, "BayesianRidge", label, n, p, - &xtr, &xte, &ytr, &yte, BayesianRidge::::new()); - reg_bench!(records, "ARDRegression", label, n, p, - &xtr, &xte, &ytr, &yte, ARDRegression::::new()); + reg_bench!( + records, + "LinearRegression", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + LinearRegression::::new() + ); + reg_bench!( + records, + "Ridge", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + Ridge::::new() + ); + reg_bench!( + records, + "Lasso", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + Lasso::::new() + ); + reg_bench!( + records, + "ElasticNet", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + ElasticNet::::new() + ); + reg_bench!( + records, + "BayesianRidge", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + BayesianRidge::::new() + ); + reg_bench!( + records, + "ARDRegression", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + ARDRegression::::new() + ); if n <= 1_000 { - reg_bench!(records, "HuberRegressor", label, n, p, - &xtr, &xte, &ytr, &yte, HuberRegressor::::new()); - reg_bench!(records, "QuantileRegressor", label, n, p, - &xtr, &xte, &ytr, &yte, QuantileRegressor::::new()); + reg_bench!( + records, + "HuberRegressor", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + HuberRegressor::::new() + ); + reg_bench!( + records, + "QuantileRegressor", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + QuantileRegressor::::new() + ); } - reg_bench!(records, "DecisionTreeRegressor", label, n, p, - &xtr, &xte, &ytr, &yte, DecisionTreeRegressor::::new()); - reg_bench!(records, "RandomForestRegressor", label, n, p, - &xtr, &xte, &ytr, &yte, - RandomForestRegressor::::new().with_random_state(42)); - reg_bench!(records, "ExtraTreesRegressor", label, n, p, - &xtr, &xte, &ytr, &yte, - ExtraTreesRegressor::::new().with_random_state(42)); + reg_bench!( + records, + "DecisionTreeRegressor", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + DecisionTreeRegressor::::new() + ); + reg_bench!( + records, + "RandomForestRegressor", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + RandomForestRegressor::::new().with_random_state(42) + ); + reg_bench!( + records, + "ExtraTreesRegressor", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + ExtraTreesRegressor::::new().with_random_state(42) + ); if n <= 1_000 { - reg_bench!(records, "GradientBoostingRegressor", label, n, p, - &xtr, &xte, &ytr, &yte, - GradientBoostingRegressor::::new().with_random_state(42)); + reg_bench!( + records, + "GradientBoostingRegressor", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + GradientBoostingRegressor::::new().with_random_state(42) + ); } - reg_bench!(records, "HistGradientBoostingRegressor", label, n, p, - &xtr, &xte, &ytr, &yte, - HistGradientBoostingRegressor::::new().with_random_state(42)); - reg_bench!(records, "KNeighborsRegressor", label, n, p, - &xtr, &xte, &ytr, &yte, KNeighborsRegressor::::new()); + reg_bench!( + records, + "HistGradientBoostingRegressor", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + HistGradientBoostingRegressor::::new().with_random_state(42) + ); + reg_bench!( + records, + "KNeighborsRegressor", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + KNeighborsRegressor::::new() + ); if n <= 2_000 { - reg_bench!(records, "KernelRidge", label, n, p, - &xtr, &xte, &ytr, &yte, KernelRidge::::new()); + reg_bench!( + records, + "KernelRidge", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + KernelRidge::::new() + ); } } } @@ -219,69 +364,262 @@ fn bench_classifiers(records: &mut Vec) { let (x, y) = classification_data(n, p); let (xtr, xte, ytr, yte) = split_classification(&x, &y); - cls_bench!(records, "LogisticRegression", label, n, p, - &xtr, &xte, &ytr, &yte, LogisticRegression::::new()); - cls_bench!(records, "RidgeClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, RidgeClassifier::::new()); - cls_bench!(records, "LinearSVC", label, n, p, - &xtr, &xte, &ytr, &yte, LinearSVC::::new()); - cls_bench!(records, "QDA", label, n, p, - &xtr, &xte, &ytr, &yte, QDA::::new()); - cls_bench!(records, "GaussianNB", label, n, p, - &xtr, &xte, &ytr, &yte, GaussianNB::::new()); - cls_bench!(records, "DecisionTreeClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, DecisionTreeClassifier::::new()); - cls_bench!(records, "ExtraTreeClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, ExtraTreeClassifier::::new()); - cls_bench!(records, "RandomForestClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, - RandomForestClassifier::::new().with_random_state(42)); - cls_bench!(records, "ExtraTreesClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, - ExtraTreesClassifier::::new().with_random_state(42)); + cls_bench!( + records, + "LogisticRegression", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + LogisticRegression::::new() + ); + cls_bench!( + records, + "RidgeClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + RidgeClassifier::::new() + ); + cls_bench!( + records, + "LinearSVC", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + LinearSVC::::new() + ); + cls_bench!( + records, + "QDA", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + QDA::::new() + ); + cls_bench!( + records, + "GaussianNB", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + GaussianNB::::new() + ); + cls_bench!( + records, + "DecisionTreeClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + DecisionTreeClassifier::::new() + ); + cls_bench!( + records, + "ExtraTreeClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + ExtraTreeClassifier::::new() + ); + cls_bench!( + records, + "RandomForestClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + RandomForestClassifier::::new().with_random_state(42) + ); + cls_bench!( + records, + "ExtraTreesClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + ExtraTreesClassifier::::new().with_random_state(42) + ); if n <= 1_000 { - cls_bench!(records, "AdaBoostClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, - AdaBoostClassifier::::new().with_random_state(42)); - cls_bench!(records, "BaggingClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, - BaggingClassifier::::new().with_random_state(42)); - cls_bench!(records, "GradientBoostingClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, - GradientBoostingClassifier::::new().with_random_state(42)); + cls_bench!( + records, + "AdaBoostClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + AdaBoostClassifier::::new().with_random_state(42) + ); + cls_bench!( + records, + "BaggingClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + BaggingClassifier::::new().with_random_state(42) + ); + cls_bench!( + records, + "GradientBoostingClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + GradientBoostingClassifier::::new().with_random_state(42) + ); } - cls_bench!(records, "HistGradientBoostingClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, - HistGradientBoostingClassifier::::new().with_random_state(42)); - cls_bench!(records, "KNeighborsClassifier", label, n, p, - &xtr, &xte, &ytr, &yte, KNeighborsClassifier::::new()); - cls_bench!(records, "NearestCentroid", label, n, p, - &xtr, &xte, &ytr, &yte, NearestCentroid::::new()); + cls_bench!( + records, + "HistGradientBoostingClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + HistGradientBoostingClassifier::::new().with_random_state(42) + ); + cls_bench!( + records, + "KNeighborsClassifier", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + KNeighborsClassifier::::new() + ); + cls_bench!( + records, + "NearestCentroid", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + NearestCentroid::::new() + ); // Non-negative-feature NB variants. let xtr_pos = xtr.mapv(f64::abs); let xte_pos = xte.mapv(f64::abs); - cls_bench!(records, "MultinomialNB", label, n, p, - &xtr_pos, &xte_pos, &ytr, &yte, MultinomialNB::::new()); - cls_bench!(records, "ComplementNB", label, n, p, - &xtr_pos, &xte_pos, &ytr, &yte, ComplementNB::::new()); + cls_bench!( + records, + "MultinomialNB", + label, + n, + p, + &xtr_pos, + &xte_pos, + &ytr, + &yte, + MultinomialNB::::new() + ); + cls_bench!( + records, + "ComplementNB", + label, + n, + p, + &xtr_pos, + &xte_pos, + &ytr, + &yte, + ComplementNB::::new() + ); // Bernoulli NB needs binary features. let xtr_bin = xtr.mapv(|v| if v > 0.0 { 1.0 } else { 0.0 }); let xte_bin = xte.mapv(|v| if v > 0.0 { 1.0 } else { 0.0 }); - cls_bench!(records, "BernoulliNB", label, n, p, - &xtr_bin, &xte_bin, &ytr, &yte, BernoulliNB::::new()); + cls_bench!( + records, + "BernoulliNB", + label, + n, + p, + &xtr_bin, + &xte_bin, + &ytr, + &yte, + BernoulliNB::::new() + ); } // Multi-class addition (one-vs-rest path). let (label, n, p) = ("multiclass_2Kx20", 2_000, 20); let (x, y) = multiclass_data(n, p, 5); let (xtr, xte, ytr, yte) = split_classification(&x, &y); - cls_bench!(records, "LogisticRegression(5class)", label, n, p, - &xtr, &xte, &ytr, &yte, LogisticRegression::::new()); - cls_bench!(records, "RandomForestClassifier(5class)", label, n, p, - &xtr, &xte, &ytr, &yte, - RandomForestClassifier::::new().with_random_state(42)); + cls_bench!( + records, + "LogisticRegression(5class)", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + LogisticRegression::::new() + ); + cls_bench!( + records, + "RandomForestClassifier(5class)", + label, + n, + p, + &xtr, + &xte, + &ytr, + &yte, + RandomForestClassifier::::new().with_random_state(42) + ); } // --------------------------------------------------------------------------- @@ -410,8 +748,7 @@ fn bench_clusterers(records: &mut Vec) { // O(n²) algorithms — restrict to small sizes. if n <= 1_000 { { - let mut rec = - BenchRecord::new("cluster", "AgglomerativeClustering", label, n, p); + let mut rec = BenchRecord::new("cluster", "AgglomerativeClustering", label, n, p); rec.fit_us = slow_once(|| { let _ = AgglomerativeClustering::::new(8).fit(&x, &()).unwrap(); }); diff --git a/ferrolearn-cluster/src/gmm.rs b/ferrolearn-cluster/src/gmm.rs index f48a2c5d..9a4b7f04 100644 --- a/ferrolearn-cluster/src/gmm.rs +++ b/ferrolearn-cluster/src/gmm.rs @@ -528,6 +528,7 @@ fn log_sum_exp_rows(log_resp: &Array2) -> (Array2, Array1) { /// inferior local minima — measured at -0.27 ARI vs sklearn at n=200 with /// uniform-random init, and ~0.16 ARI gap remaining at n=5000 with /// single-trial KMeans++. +#[allow(clippy::needless_range_loop)] // index-keyed access into `min_sq_dist` is clearer than enumerate-rebind fn init_means(x: &Array2, k: usize, rng: &mut StdRng) -> Array2 { let n_samples = x.nrows(); let n_features = x.ncols(); diff --git a/ferrolearn-cluster/src/kmeans.rs b/ferrolearn-cluster/src/kmeans.rs index 1ba2910d..56fd41ab 100644 --- a/ferrolearn-cluster/src/kmeans.rs +++ b/ferrolearn-cluster/src/kmeans.rs @@ -188,10 +188,7 @@ fn kmeans_plus_plus(x: &Array2, k: usize, rng: &mut StdRng) -> Arra let center0 = centers.row(0); let center0_slice = center0.as_slice().unwrap_or(&[]); for i in 0..n_samples { - min_dists[i] = squared_euclidean( - x.row(i).as_slice().unwrap_or(&[]), - center0_slice, - ); + min_dists[i] = squared_euclidean(x.row(i).as_slice().unwrap_or(&[]), center0_slice); } } @@ -210,8 +207,7 @@ fn kmeans_plus_plus(x: &Array2, k: usize, rng: &mut StdRng) -> Arra let mut best_new_dists: Option> = None; for _ in 0..n_trials { - let threshold: F = - F::from(rng.random::()).unwrap_or_else(F::zero) * total; + let threshold: F = F::from(rng.random::()).unwrap_or_else(F::zero) * total; let mut cumsum = F::zero(); let mut candidate = n_samples - 1; for i in 0..n_samples { @@ -226,10 +222,7 @@ fn kmeans_plus_plus(x: &Array2, k: usize, rng: &mut StdRng) -> Arra let mut new_dists = min_dists.clone(); let mut potential = F::zero(); for i in 0..n_samples { - let d = squared_euclidean( - x.row(i).as_slice().unwrap_or(&[]), - &cand_slice, - ); + let d = squared_euclidean(x.row(i).as_slice().unwrap_or(&[]), &cand_slice); if d < new_dists[i] { new_dists[i] = d; } diff --git a/ferrolearn-cluster/src/mini_batch_kmeans.rs b/ferrolearn-cluster/src/mini_batch_kmeans.rs index 9a025823..f261f689 100644 --- a/ferrolearn-cluster/src/mini_batch_kmeans.rs +++ b/ferrolearn-cluster/src/mini_batch_kmeans.rs @@ -249,10 +249,7 @@ fn kmeans_plus_plus_mb(x: &Array2, k: usize, rng: &mut StdRng) -> A let center0 = centers.row(0); let center0_slice = center0.as_slice().unwrap_or(&[]); for i in 0..n_samples { - min_dists[i] = squared_euclidean_mb( - x.row(i).as_slice().unwrap_or(&[]), - center0_slice, - ); + min_dists[i] = squared_euclidean_mb(x.row(i).as_slice().unwrap_or(&[]), center0_slice); } } @@ -276,8 +273,7 @@ fn kmeans_plus_plus_mb(x: &Array2, k: usize, rng: &mut StdRng) -> A let mut best_new_dists: Option> = None; for _ in 0..n_trials { - let threshold: F = - F::from(rng.random::()).unwrap_or_else(F::zero) * total; + let threshold: F = F::from(rng.random::()).unwrap_or_else(F::zero) * total; let mut cumsum = F::zero(); let mut candidate = n_samples - 1; for i in 0..n_samples { @@ -294,10 +290,7 @@ fn kmeans_plus_plus_mb(x: &Array2, k: usize, rng: &mut StdRng) -> A let mut new_dists = min_dists.clone(); let mut potential = F::zero(); for i in 0..n_samples { - let d = squared_euclidean_mb( - x.row(i).as_slice().unwrap_or(&[]), - &cand_slice, - ); + let d = squared_euclidean_mb(x.row(i).as_slice().unwrap_or(&[]), &cand_slice); if d < new_dists[i] { new_dists[i] = d; } diff --git a/ferrolearn-cluster/tests/api_proof.rs b/ferrolearn-cluster/tests/api_proof.rs index 2463afd0..a207349a 100644 --- a/ferrolearn-cluster/tests/api_proof.rs +++ b/ferrolearn-cluster/tests/api_proof.rs @@ -19,15 +19,14 @@ //! - All public enum variants use approx::assert_relative_eq; -use ferrolearn_core::traits::{Fit, Predict, Transform}; use ferrolearn_cluster::{ AffinityPropagation, AgglomerativeClustering, AgglomerativeLinkage, BayesianCovType, BayesianGaussianMixture, Birch, BisectingKMeans, BisectingStrategy, CovarianceType, DBSCAN, FeatureAgglomeration, GaussianMixture, Hdbscan, KMeans, LabelPropagation, LabelPropagationKernel, LabelSpreading, LabelSpreadingKernel, Linkage, MeanShift, - MiniBatchKMeans, MiniBatchKMeansInit, OPTICS, PoolingFunc, SpectralClustering, - WeightPriorType, + MiniBatchKMeans, MiniBatchKMeansInit, OPTICS, PoolingFunc, SpectralClustering, WeightPriorType, }; +use ferrolearn_core::traits::{Fit, Predict, Transform}; use ndarray::{Array1, Array2, array}; /// Two well-separated clusters in 2D for unsupervised tests. @@ -169,7 +168,12 @@ fn api_proof_optics() { #[test] fn api_proof_agglomerative_clustering() { let x = two_blobs(); - for linkage in [Linkage::Ward, Linkage::Complete, Linkage::Average, Linkage::Single] { + for linkage in [ + Linkage::Ward, + Linkage::Complete, + Linkage::Average, + Linkage::Single, + ] { let m = AgglomerativeClustering::::new(2).with_linkage(linkage); let f = m.fit(&x, &()).unwrap(); let _ = f.labels(); @@ -302,7 +306,10 @@ fn api_proof_bayesian_gaussian_mixture() { BayesianCovType::Diag, BayesianCovType::Spherical, ] { - for wpt in [WeightPriorType::DirichletProcess, WeightPriorType::DirichletDistribution] { + for wpt in [ + WeightPriorType::DirichletProcess, + WeightPriorType::DirichletDistribution, + ] { let m = BayesianGaussianMixture::::new(3) .with_covariance_type(cov) .with_weight_prior_type(wpt) @@ -343,7 +350,9 @@ fn api_proof_bayesian_gaussian_mixture() { fn semi_supervised_data() -> (Array2, Array1) { let x = Array2::from_shape_vec( (8, 2), - vec![0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.1, 0.1, 10.0, 10.0, 10.1, 10.0, 10.0, 10.1, 10.1, 10.1], + vec![ + 0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.1, 0.1, 10.0, 10.0, 10.1, 10.0, 10.0, 10.1, 10.1, 10.1, + ], ) .unwrap(); // First and fifth labeled; others unlabeled (-1). diff --git a/ferrolearn-cluster/tests/conformance_sklearn.rs b/ferrolearn-cluster/tests/conformance_sklearn.rs index 48df075b..d32d8d2b 100644 --- a/ferrolearn-cluster/tests/conformance_sklearn.rs +++ b/ferrolearn-cluster/tests/conformance_sklearn.rs @@ -18,8 +18,8 @@ use ferrolearn_core::{Fit, Predict}; use ferrolearn_test_oracle::{ - assert_ari_ge, assert_close, assert_close_slice, json_to_array2, json_to_labels, - load_fixture, MIN_CLUSTER_ARI, TOL_CLUSTER_CENTER_ABS, TOL_CLUSTER_CENTER_REL, + MIN_CLUSTER_ARI, TOL_CLUSTER_CENTER_ABS, TOL_CLUSTER_CENTER_REL, assert_ari_ge, assert_close, + assert_close_slice, json_to_array2, json_to_labels, load_fixture, }; use ndarray::{Array2, ArrayView1}; @@ -51,7 +51,14 @@ fn closest_row_index(target: ArrayView1, centers: &Array2) -> usize { /// Compare two sets of cluster centers up to row permutation: for each /// expected row, find the closest actual row and assert element-wise /// agreement. Both sides must have the same number of rows. -fn assert_centers_match(actual: &Array2, expected: &Array2, rel: f64, abs: f64, label: &str) { +#[allow(clippy::needless_range_loop)] // skip-if-used index walk reads cleaner than filter/enumerate +fn assert_centers_match( + actual: &Array2, + expected: &Array2, + rel: f64, + abs: f64, + label: &str, +) { assert_eq!( actual.shape(), expected.shape(), @@ -113,7 +120,12 @@ fn conformance_kmeans() { let expected_labels = json_to_labels(&fx.expected["labels"]); let actual_labels: Vec = fitted.labels().iter().map(|&v| v as i64).collect(); - assert_ari_ge(&actual_labels, &expected_labels, MIN_CLUSTER_ARI, "KMeans.labels"); + assert_ari_ge( + &actual_labels, + &expected_labels, + MIN_CLUSTER_ARI, + "KMeans.labels", + ); let expected_centers = json_to_array2(&fx.expected["cluster_centers"]); assert_centers_match( @@ -154,7 +166,12 @@ fn conformance_dbscan() { // raw labels is meaningful. (sklearn also encodes noise as -1.) let expected_labels = json_to_labels(&fx.expected["labels"]); let actual_labels: Vec = fitted.labels().iter().map(|&v| v as i64).collect(); - assert_ari_ge(&actual_labels, &expected_labels, MIN_CLUSTER_ARI, "DBSCAN.labels"); + assert_ari_ge( + &actual_labels, + &expected_labels, + MIN_CLUSTER_ARI, + "DBSCAN.labels", + ); // Order-invariant scalars: should match exactly. let expected_n_clusters = fx.expected["n_clusters"].as_u64().unwrap() as usize; @@ -167,10 +184,7 @@ fn conformance_dbscan() { let actual_n_noise = actual_labels.iter().filter(|&&v| v == -1).count(); assert_eq!(actual_n_noise, expected_n_noise, "DBSCAN.n_noise"); - let expected_core_count = fx.expected["core_sample_indices"] - .as_array() - .unwrap() - .len(); + let expected_core_count = fx.expected["core_sample_indices"].as_array().unwrap().len(); assert_eq!( fitted.core_sample_indices().len(), expected_core_count, @@ -197,8 +211,8 @@ fn conformance_agglomerative_clustering() { other => panic!("unsupported linkage in fixture: {other}"), }; - let model = ferrolearn_cluster::AgglomerativeClustering::::new(n_clusters) - .with_linkage(linkage); + let model = + ferrolearn_cluster::AgglomerativeClustering::::new(n_clusters).with_linkage(linkage); let fitted = model.fit(&x, &()).expect("AgglomerativeClustering fit"); assert_eq!( @@ -379,7 +393,10 @@ fn conformance_birch() { // shape and may legitimately differ from sklearn's. Just sanity-check // it's non-empty and has the right feature dimension. let sub = fitted.subcluster_centers(); - assert!(sub.nrows() > 0, "Birch.subcluster_centers must be non-empty"); + assert!( + sub.nrows() > 0, + "Birch.subcluster_centers must be non-empty" + ); assert_eq!( sub.ncols(), x.ncols(), @@ -388,7 +405,12 @@ fn conformance_birch() { let expected_labels = json_to_labels(&fx.expected["labels"]); let actual_labels: Vec = fitted.labels().iter().map(|&v| v as i64).collect(); - assert_ari_ge(&actual_labels, &expected_labels, MIN_CLUSTER_ARI, "Birch.labels"); + assert_ari_ge( + &actual_labels, + &expected_labels, + MIN_CLUSTER_ARI, + "Birch.labels", + ); } // --------------------------------------------------------------------------- @@ -482,8 +504,8 @@ fn conformance_spectral_clustering() { "SpectralClustering fixture uses unsupported affinity '{affinity}' (only 'rbf' is implemented)" ); - let model = - ferrolearn_cluster::SpectralClustering::::new(n_clusters).with_random_state(random_state); + let model = ferrolearn_cluster::SpectralClustering::::new(n_clusters) + .with_random_state(random_state); let fitted = model.fit(&x, &()).expect("SpectralClustering fit"); let labels = fitted.labels(); diff --git a/ferrolearn-cluster/tests/conformance_surface_coverage.rs b/ferrolearn-cluster/tests/conformance_surface_coverage.rs index c5f27304..b5bc24a6 100644 --- a/ferrolearn-cluster/tests/conformance_surface_coverage.rs +++ b/ferrolearn-cluster/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-cluster/tests/conformance_wave4.rs b/ferrolearn-cluster/tests/conformance_wave4.rs index 746d8661..fed7b464 100644 --- a/ferrolearn-cluster/tests/conformance_wave4.rs +++ b/ferrolearn-cluster/tests/conformance_wave4.rs @@ -43,12 +43,7 @@ fn conformance_bayesian_gaussian_mixture() { let expected = json_to_labels(&fx.expected["labels"]); // Variational EM converges to different posteriors than sklearn under // different init RNG paths; ARI floor 0.40 accepts the divergence. - assert_ari_ge( - &actual, - &expected, - 0.40, - "BayesianGaussianMixture.labels", - ); + assert_ari_ge(&actual, &expected, 0.40, "BayesianGaussianMixture.labels"); } #[test] @@ -76,7 +71,9 @@ fn conformance_feature_agglomeration() { let model = FeatureAgglomeration::::new(n_clusters); let fitted = model.fit(&x, &()).expect("FeatureAgglomeration fit"); - let xt = fitted.transform(&x).expect("FeatureAgglomeration transform"); + let xt = fitted + .transform(&x) + .expect("FeatureAgglomeration transform"); assert_eq!(xt.nrows(), x.nrows(), "FA transformed rows"); assert_eq!(xt.ncols(), n_clusters, "FA transformed cols"); for v in xt.iter() { @@ -91,9 +88,7 @@ fn conformance_hdbscan() { let min_cluster_size = fx.params["min_cluster_size"].as_u64().unwrap_or(10) as usize; let model = Hdbscan::::new().with_min_cluster_size(min_cluster_size); - let labels = model - .fit_predict(&x) - .expect("Hdbscan fit_predict"); + let labels = model.fit_predict(&x).expect("Hdbscan fit_predict"); let actual: Vec = labels.iter().map(|&v| v as i64).collect(); let expected = json_to_labels(&fx.expected["labels"]); // HDBSCAN clustering uses different mutual-reachability minimum-spanning-tree @@ -126,7 +121,11 @@ fn conformance_label_propagation() { .collect(); let actual: Vec = preds.iter().map(|&v| v as i64).collect(); // Different gamma normalization paths can flip ~10% of labels. - let matches = actual.iter().zip(expected.iter()).filter(|(a, e)| a == e).count(); + let matches = actual + .iter() + .zip(expected.iter()) + .filter(|(a, e)| a == e) + .count(); let acc = matches as f64 / actual.len() as f64; assert!( acc >= 0.80, @@ -160,7 +159,11 @@ fn conformance_label_spreading() { .map(|v| v.as_i64().unwrap()) .collect(); let actual: Vec = preds.iter().map(|&v| v as i64).collect(); - let matches = actual.iter().zip(expected.iter()).filter(|(a, e)| a == e).count(); + let matches = actual + .iter() + .zip(expected.iter()) + .filter(|(a, e)| a == e) + .count(); let acc = matches as f64 / actual.len() as f64; assert!( acc >= 0.80, diff --git a/ferrolearn-covariance/src/covariance.rs b/ferrolearn-covariance/src/covariance.rs index 650a9613..432a6cee 100644 --- a/ferrolearn-covariance/src/covariance.rs +++ b/ferrolearn-covariance/src/covariance.rs @@ -1245,8 +1245,7 @@ impl Fit, ()> for MinCovDet { false } else { let ratio = min_d / max_d; - let ratio_f: f64 = - num_traits::ToPrimitive::to_f64(&ratio).unwrap_or(0.0); + let ratio_f: f64 = num_traits::ToPrimitive::to_f64(&ratio).unwrap_or(0.0); ratio_f > 1e-3 } } @@ -1272,8 +1271,7 @@ impl Fit, ()> for MinCovDet { for i in 0..p { for j in 0..p { if i == j { - shrunk[[i, j]] = - (F::one() - alpha) * shrunk[[i, j]] + alpha * diag_target; + shrunk[[i, j]] = (F::one() - alpha) * shrunk[[i, j]] + alpha * diag_target; } else { shrunk[[i, j]] = (F::one() - alpha) * shrunk[[i, j]]; } diff --git a/ferrolearn-covariance/tests/conformance_sklearn.rs b/ferrolearn-covariance/tests/conformance_sklearn.rs index 2053e252..9d3bf386 100644 --- a/ferrolearn-covariance/tests/conformance_sklearn.rs +++ b/ferrolearn-covariance/tests/conformance_sklearn.rs @@ -14,12 +14,10 @@ //! documented run-to-run variance. Fixture tolerance widened accordingly. use ferrolearn_core::Fit; -use ferrolearn_covariance::{ - EmpiricalCovariance, LedoitWolf, MinCovDet, OAS, ShrunkCovariance, -}; +use ferrolearn_covariance::{EmpiricalCovariance, LedoitWolf, MinCovDet, OAS, ShrunkCovariance}; use ferrolearn_test_oracle::{ - assert_close, assert_close_slice, json_to_array1, json_to_array2, load_fixture, - TOL_COVARIANCE_ABS, TOL_COVARIANCE_REL, + TOL_COVARIANCE_ABS, TOL_COVARIANCE_REL, assert_close, assert_close_slice, json_to_array1, + json_to_array2, load_fixture, }; // --------------------------------------------------------------------------- @@ -134,7 +132,9 @@ fn conformance_ledoit_wolf() { let x = json_to_array2(&fx.input["X"]); let (rel, abs) = fx.tolerance(TOL_COVARIANCE_REL, TOL_COVARIANCE_ABS); - let fitted = LedoitWolf::::new().fit(&x, &()).expect("LedoitWolf fit"); + let fitted = LedoitWolf::::new() + .fit(&x, &()) + .expect("LedoitWolf fit"); let expected_loc = json_to_array1(&fx.expected["location"]); let expected_cov = json_to_array2(&fx.expected["covariance"]); diff --git a/ferrolearn-covariance/tests/conformance_surface_coverage.rs b/ferrolearn-covariance/tests/conformance_surface_coverage.rs index c1895be0..f0645d4f 100644 --- a/ferrolearn-covariance/tests/conformance_surface_coverage.rs +++ b/ferrolearn-covariance/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-covariance/tests/conformance_wave4.rs b/ferrolearn-covariance/tests/conformance_wave4.rs index bb5faf89..21828f8c 100644 --- a/ferrolearn-covariance/tests/conformance_wave4.rs +++ b/ferrolearn-covariance/tests/conformance_wave4.rs @@ -2,9 +2,7 @@ use ferrolearn_core::{Fit, Predict}; use ferrolearn_covariance::{EllipticEnvelope, GraphicalLasso}; -use ferrolearn_test_oracle::{ - assert_close_slice, json_to_array1, json_to_array2, load_fixture, -}; +use ferrolearn_test_oracle::{assert_close_slice, json_to_array1, json_to_array2, load_fixture}; #[test] fn conformance_graphical_lasso_location() { @@ -62,7 +60,11 @@ fn conformance_elliptic_envelope() { .iter() .map(|v| v.as_i64().unwrap()) .collect(); - let matches = preds.iter().zip(expected.iter()).filter(|&(&a, &e)| a as i64 == e).count(); + let matches = preds + .iter() + .zip(expected.iter()) + .filter(|&(&a, &e)| a as i64 == e) + .count(); let frac = matches as f64 / preds.len() as f64; // EllipticEnvelope depends on FastMCD which has subset variance (see // documented divergence `fastmcd-subset-selection-variance` and diff --git a/ferrolearn-decomp/src/minibatch_nmf.rs b/ferrolearn-decomp/src/minibatch_nmf.rs index 25b14c1f..bd42682d 100644 --- a/ferrolearn-decomp/src/minibatch_nmf.rs +++ b/ferrolearn-decomp/src/minibatch_nmf.rs @@ -278,7 +278,11 @@ fn init_nndsvd_simple( // Clamp negatives to zero and store as row of H. for j in 0..n_features { let val = v[[j, 0]]; - h[[k, j]] = if val > F::zero() { val } else { eps::() * scale }; + h[[k, j]] = if val > F::zero() { + val + } else { + eps::() * scale + }; } } @@ -308,11 +312,7 @@ fn reconstruction_error(x: &Array2, w: &Array2, h: &Ar /// Solve for W_batch via coordinate descent on `||X_batch - W_batch @ H||^2`, /// keeping H fixed. All values in W_batch are clamped non-negative. -fn update_w_batch( - x_batch: &Array2, - w_batch: &mut Array2, - h: &Array2, -) { +fn update_w_batch(x_batch: &Array2, w_batch: &mut Array2, h: &Array2) { let n_batch = x_batch.nrows(); let n_components = h.nrows(); let n_features = h.ncols(); @@ -426,8 +426,7 @@ impl Fit, ()> for MiniBatchNMF { let mut batch_start = 0; while batch_start < n_samples { let batch_end = (batch_start + batch_size).min(n_samples); - let batch_indices: Vec = - indices[batch_start..batch_end].to_vec(); + let batch_indices: Vec = indices[batch_start..batch_end].to_vec(); let actual_batch = batch_indices.len(); // Extract X_batch. diff --git a/ferrolearn-decomp/tests/api_proof.rs b/ferrolearn-decomp/tests/api_proof.rs index c2015c78..5832ddbe 100644 --- a/ferrolearn-decomp/tests/api_proof.rs +++ b/ferrolearn-decomp/tests/api_proof.rs @@ -56,7 +56,10 @@ fn api_proof_pca() { #[test] fn api_proof_incremental_pca() { let x = small_2d_data(); - let f = IncrementalPCA::::new(2).with_batch_size(4).fit(&x, &()).unwrap(); + let f = IncrementalPCA::::new(2) + .with_batch_size(4) + .fit(&x, &()) + .unwrap(); let z = f.transform(&x).unwrap(); assert_eq!(z.dim(), (12, 2)); let recon = f.inverse_transform(&z).unwrap(); @@ -72,7 +75,12 @@ fn api_proof_incremental_pca() { #[test] fn api_proof_kernel_pca() { let x = small_2d_data(); - for kernel in [Kernel::Linear, Kernel::RBF, Kernel::Polynomial, Kernel::Sigmoid] { + for kernel in [ + Kernel::Linear, + Kernel::RBF, + Kernel::Polynomial, + Kernel::Sigmoid, + ] { let f = KernelPCA::::new(2) .with_kernel(kernel) .with_gamma(1.0) @@ -105,7 +113,10 @@ fn api_proof_sparse_pca() { #[test] fn api_proof_truncated_svd() { let x = small_2d_data(); - let f = TruncatedSVD::::new(2).with_random_state(0).fit(&x, &()).unwrap(); + let f = TruncatedSVD::::new(2) + .with_random_state(0) + .fit(&x, &()) + .unwrap(); let z = f.transform(&x).unwrap(); assert_eq!(z.dim(), (12, 2)); let recon = f.inverse_transform(&z).unwrap(); @@ -122,7 +133,10 @@ fn api_proof_truncated_svd() { #[test] fn api_proof_nmf() { let x = count_data(); - for solver in [NMFSolver::CoordinateDescent, NMFSolver::MultiplicativeUpdate] { + for solver in [ + NMFSolver::CoordinateDescent, + NMFSolver::MultiplicativeUpdate, + ] { for init in [NMFInit::Random, NMFInit::Nndsvd] { let f = NMF::::new(2) .with_max_iter(100) diff --git a/ferrolearn-decomp/tests/conformance_sklearn.rs b/ferrolearn-decomp/tests/conformance_sklearn.rs index f90813b0..264b6e36 100644 --- a/ferrolearn-decomp/tests/conformance_sklearn.rs +++ b/ferrolearn-decomp/tests/conformance_sklearn.rs @@ -19,9 +19,9 @@ use ferrolearn_core::{Fit, Transform}; use ferrolearn_test_oracle::{ - assert_close, assert_close_rows_sign_ambiguous, assert_close_slice, - assert_close_slice_sign_ambiguous, json_to_array1, json_to_array2, load_fixture, - TOL_DECOMP_ABS, TOL_DECOMP_REL, + TOL_DECOMP_ABS, TOL_DECOMP_REL, assert_close, assert_close_rows_sign_ambiguous, + assert_close_slice, assert_close_slice_sign_ambiguous, json_to_array1, json_to_array2, + load_fixture, }; // --------------------------------------------------------------------------- diff --git a/ferrolearn-decomp/tests/conformance_surface_coverage.rs b/ferrolearn-decomp/tests/conformance_surface_coverage.rs index fae4fd53..3d248518 100644 --- a/ferrolearn-decomp/tests/conformance_surface_coverage.rs +++ b/ferrolearn-decomp/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-decomp/tests/conformance_wave2.rs b/ferrolearn-decomp/tests/conformance_wave2.rs index 84eee4a7..6a0102fa 100644 --- a/ferrolearn-decomp/tests/conformance_wave2.rs +++ b/ferrolearn-decomp/tests/conformance_wave2.rs @@ -10,14 +10,14 @@ use ferrolearn_core::{Fit, Transform}; use ferrolearn_decomp::{ - cross_decomposition::{PLSCanonical, PLSRegression, CCA}, - DictionaryLearning, FactorAnalysis, FastICA, IncrementalPCA, Isomap, Kernel, KernelPCA, - LatentDirichletAllocation, LdaLearningMethod, MiniBatchNMF, MDS, LLE, SparsePCA, - SpectralEmbedding, TruncatedSVD, Tsne, + DictionaryLearning, FactorAnalysis, FastICA, IncrementalPCA, Isomap, Kernel, KernelPCA, LLE, + LatentDirichletAllocation, LdaLearningMethod, MDS, MiniBatchNMF, SparsePCA, SpectralEmbedding, + TruncatedSVD, Tsne, + cross_decomposition::{CCA, PLSCanonical, PLSRegression}, }; use ferrolearn_test_oracle::{ - assert_close, assert_close_slice, json_to_array1, json_to_array2, load_fixture, - TOL_DECOMP_ABS, TOL_DECOMP_REL, + TOL_DECOMP_ABS, TOL_DECOMP_REL, assert_close, assert_close_slice, json_to_array1, + json_to_array2, load_fixture, }; fn finite_and_shaped(arr: &ndarray::Array2, n_rows: usize, n_cols: usize, label: &str) { @@ -132,11 +132,7 @@ fn conformance_kernel_pca() { expected_eigvals.len(), "KernelPCA.eigenvalues length" ); - for (i, (&a, &e)) in actual - .iter() - .zip(expected_eigvals.iter()) - .enumerate() - { + for (i, (&a, &e)) in actual.iter().zip(expected_eigvals.iter()).enumerate() { assert_close(a, e, 1e-4, 1e-6, &format!("KernelPCA.eigenvalues[{i}]")); } } @@ -214,7 +210,13 @@ fn conformance_incremental_pca() { let actual = fitted.explained_variance(); assert_eq!(actual.len(), expected_ev.len(), "IPCA.expl_var length"); for (i, (&a, &e)) in actual.iter().zip(expected_ev.iter()).enumerate() { - assert_close(a, e, 5e-2, 1e-4, &format!("IPCA.explained_variance[{i}] (blocked by #342)")); + assert_close( + a, + e, + 5e-2, + 1e-4, + &format!("IPCA.explained_variance[{i}] (blocked by #342)"), + ); } } @@ -285,10 +287,7 @@ fn conformance_mini_batch_nmf() { // W and components_ should be non-negative. assert!(w.iter().all(|&v| v >= 0.0), "MiniBatchNMF W has negatives"); let h = fitted.components(); - assert!( - h.iter().all(|&v| v >= 0.0), - "MiniBatchNMF H has negatives" - ); + assert!(h.iter().all(|&v| v >= 0.0), "MiniBatchNMF H has negatives"); // Reconstruction error within an order of magnitude of sklearn's. let expected_err = fx.expected["reconstruction_error"].as_f64().unwrap(); @@ -454,7 +453,12 @@ fn conformance_spectral_embedding() { let fitted = SpectralEmbedding::new(n_components) .fit(&x, &()) .expect("SpectralEmbedding fit"); - finite_and_shaped(fitted.embedding(), x.nrows(), n_components, "SpectralEmbedding"); + finite_and_shaped( + fitted.embedding(), + x.nrows(), + n_components, + "SpectralEmbedding", + ); } #[test] diff --git a/ferrolearn-kernel/tests/api_proof.rs b/ferrolearn-kernel/tests/api_proof.rs index 972491ff..ce31136e 100644 --- a/ferrolearn-kernel/tests/api_proof.rs +++ b/ferrolearn-kernel/tests/api_proof.rs @@ -22,12 +22,12 @@ use ferrolearn_core::traits::{Fit, Predict, Transform}; use ferrolearn_kernel::bandwidth::BandwidthStrategy; use ferrolearn_kernel::{ - BiweightKernel, ConstantKernel, CosineKernel, CvStrategy, DotProductKernel, - EpanechnikovKernel, GaussianKernel, GaussianProcessClassifier, GaussianProcessRegressor, - HeteroscedasticityTest, KernelRidge, KernelType, LocalPolynomialRegression, MaternKernel, - NadarayaWatson, Nystroem, ProductKernel, RBFKernel, RBFSampler, SumKernel, TricubeKernel, - TriweightKernel, UniformKernel, WhiteKernel, heteroscedasticity_test, residual_diagnostics, - scott_bandwidth, silverman_bandwidth, + BiweightKernel, ConstantKernel, CosineKernel, CvStrategy, DotProductKernel, EpanechnikovKernel, + GaussianKernel, GaussianProcessClassifier, GaussianProcessRegressor, HeteroscedasticityTest, + KernelRidge, KernelType, LocalPolynomialRegression, MaternKernel, NadarayaWatson, Nystroem, + ProductKernel, RBFKernel, RBFSampler, SumKernel, TricubeKernel, TriweightKernel, UniformKernel, + WhiteKernel, heteroscedasticity_test, residual_diagnostics, scott_bandwidth, + silverman_bandwidth, }; use ndarray::{Array1, Array2, array}; @@ -211,7 +211,10 @@ fn api_proof_gp_kernel_zoo() { let dp = DotProductKernel::new(1.0); let _ = dp.compute(&x, &x); - let s = SumKernel::new(Box::new(RBFKernel::new(1.0)), Box::new(WhiteKernel::new(0.1))); + let s = SumKernel::new( + Box::new(RBFKernel::new(1.0)), + Box::new(WhiteKernel::new(0.1)), + ); let _ = s.compute(&x, &x); let p = ProductKernel::new( diff --git a/ferrolearn-kernel/tests/conformance_sklearn.rs b/ferrolearn-kernel/tests/conformance_sklearn.rs index fc256b30..e6cd545c 100644 --- a/ferrolearn-kernel/tests/conformance_sklearn.rs +++ b/ferrolearn-kernel/tests/conformance_sklearn.rs @@ -6,8 +6,8 @@ use ferrolearn_core::{Fit, Predict}; use ferrolearn_kernel::{KernelRidge, KernelType}; use ferrolearn_test_oracle::{ - assert_close_slice, json_to_array1, json_to_array2, load_fixture, TOL_KERNEL_ABS, - TOL_KERNEL_REL, + TOL_KERNEL_ABS, TOL_KERNEL_REL, assert_close_slice, json_to_array1, json_to_array2, + load_fixture, }; #[test] @@ -24,7 +24,9 @@ fn conformance_kernel_ridge_linear() { "polynomial" => KernelType::Polynomial, other => panic!("unsupported kernel: {other}"), }; - let model = KernelRidge::::new().with_alpha(alpha).with_kernel(kernel); + let model = KernelRidge::::new() + .with_alpha(alpha) + .with_kernel(kernel); let fitted = model.fit(&x, &y).expect("KernelRidge fit"); let preds = fitted.predict(&x).expect("KernelRidge predict"); diff --git a/ferrolearn-kernel/tests/conformance_surface_coverage.rs b/ferrolearn-kernel/tests/conformance_surface_coverage.rs index ea7b1d0b..c11ee7c1 100644 --- a/ferrolearn-kernel/tests/conformance_surface_coverage.rs +++ b/ferrolearn-kernel/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-kernel/tests/conformance_wave5.rs b/ferrolearn-kernel/tests/conformance_wave5.rs index 2a1d3cb8..7744a3dc 100644 --- a/ferrolearn-kernel/tests/conformance_wave5.rs +++ b/ferrolearn-kernel/tests/conformance_wave5.rs @@ -60,12 +60,13 @@ fn conformance_gaussian_process_classifier() { .iter() .map(|v| v.as_u64().unwrap() as usize) .collect(); - let matches = preds.iter().zip(expected_preds.iter()).filter(|(a, e)| a == e).count(); + let matches = preds + .iter() + .zip(expected_preds.iter()) + .filter(|(a, e)| a == e) + .count(); let acc = matches as f64 / preds.len() as f64; - assert!( - acc >= 0.85, - "GP classifier accuracy {acc:.4} < 0.85 floor" - ); + assert!(acc >= 0.85, "GP classifier accuracy {acc:.4} < 0.85 floor"); } #[test] diff --git a/ferrolearn-linear/src/ard.rs b/ferrolearn-linear/src/ard.rs index 5b3bb6eb..00b3a714 100644 --- a/ferrolearn-linear/src/ard.rs +++ b/ferrolearn-linear/src/ard.rs @@ -292,11 +292,7 @@ impl Fit, - y: &Array1, - ) -> Result, FerroError> { + fn fit(&self, x: &Array2, y: &Array1) -> Result, FerroError> { let (n_samples, n_features) = x.dim(); if n_samples != y.len() { @@ -350,9 +346,8 @@ impl Fit = Array1::from_shape_fn(n_features, |i| { - F::one() - lambda[i] * sd_new[i] - }); + let gamma: Array1 = + Array1::from_shape_fn(n_features, |i| F::one() - lambda[i] * sd_new[i]); let gamma_sum: F = gamma.iter().fold(F::zero(), |a, &b| a + b); @@ -612,7 +607,9 @@ mod tests { // y depends only on x1, x2 is noise-free irrelevant. let x = Array2::from_shape_vec( (6, 2), - vec![1.0, 100.0, 2.0, 200.0, 3.0, 300.0, 4.0, 400.0, 5.0, 500.0, 6.0, 600.0], + vec![ + 1.0, 100.0, 2.0, 200.0, 3.0, 300.0, 4.0, 400.0, 5.0, 500.0, 6.0, 600.0, + ], ) .unwrap(); let y = array![2.0, 4.0, 6.0, 8.0, 10.0, 12.0]; // y = 2 * x1 diff --git a/ferrolearn-linear/src/glm.rs b/ferrolearn-linear/src/glm.rs index 9cd10e53..bb78dd53 100644 --- a/ferrolearn-linear/src/glm.rs +++ b/ferrolearn-linear/src/glm.rs @@ -628,7 +628,9 @@ fn fit_glm_irls::new(GLMFamily::Poisson) - .fit(&x, &y) - .is_err()); + assert!( + GLMRegressor::::new(GLMFamily::Poisson) + .fit(&x, &y) + .is_err() + ); } #[test] fn test_glm_negative_alpha() { let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap(); let y = array![1.0, 2.0, 3.0]; - assert!(GLMRegressor::::new(GLMFamily::Poisson) - .with_alpha(-1.0) - .fit(&x, &y) - .is_err()); + assert!( + GLMRegressor::::new(GLMFamily::Poisson) + .with_alpha(-1.0) + .fit(&x, &y) + .is_err() + ); } #[test] @@ -1160,8 +1166,10 @@ mod tests { fn test_glm_negative_y() { let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap(); let y = array![1.0, -2.0, 3.0]; - assert!(GLMRegressor::::new(GLMFamily::Poisson) - .fit(&x, &y) - .is_err()); + assert!( + GLMRegressor::::new(GLMFamily::Poisson) + .fit(&x, &y) + .is_err() + ); } } diff --git a/ferrolearn-linear/src/lars.rs b/ferrolearn-linear/src/lars.rs index e06664ac..92d70142 100644 --- a/ferrolearn-linear/src/lars.rs +++ b/ferrolearn-linear/src/lars.rs @@ -216,8 +216,7 @@ fn ols_active( let xtx = xat.dot(&xa); let xty = xat.dot(y); - let w_active = cholesky_solve(&xtx, &xty) - .or_else(|_| gaussian_solve(k, &xtx, &xty))?; + let w_active = cholesky_solve(&xtx, &xty).or_else(|_| gaussian_solve(k, &xtx, &xty))?; // Scatter into full-length vector. let mut w = Array1::::zeros(n_features); @@ -437,14 +436,11 @@ impl Fit n_features { return Err(FerroError::InvalidParameter { name: "n_nonzero_coefs".into(), - reason: format!( - "cannot exceed number of features ({n_features})" - ), + reason: format!("cannot exceed number of features ({n_features})"), }); } - let (x_work, y_work, x_mean, y_mean) = - center_data(x, y, self.fit_intercept)?; + let (x_work, y_work, x_mean, y_mean) = center_data(x, y, self.fit_intercept)?; let w = lars_path(&x_work, &y_work, max_active, false)?; let intercept = compute_intercept(&x_mean, &y_mean, &w); @@ -603,10 +599,7 @@ fn lars_path( continue; } let a_j = x.column(j).dot(&u_vec); - let cands = [ - (c_max - corr[j], a_a - a_j), - (c_max + corr[j], a_a + a_j), - ]; + let cands = [(c_max - corr[j], a_a - a_j), (c_max + corr[j], a_a + a_j)]; for (num, den) in cands { if den.abs() <= eps { continue; @@ -706,8 +699,7 @@ impl Fit = Vec::new(); let mut in_active = vec![false; n_features]; @@ -750,9 +742,7 @@ impl Fit::new().with_n_nonzero_coefs(1).fit(&x, &y).unwrap(); + let fitted = Lars::::new() + .with_n_nonzero_coefs(1) + .fit(&x, &y) + .unwrap(); let nonzero = fitted .coefficients() .iter() @@ -992,7 +984,12 @@ mod tests { fn test_lars_n_nonzero_exceeds_features() { let x = Array2::from_shape_vec((3, 2), vec![1.0, 0.0, 2.0, 0.0, 3.0, 0.0]).unwrap(); let y = array![1.0, 2.0, 3.0]; - assert!(Lars::::new().with_n_nonzero_coefs(5).fit(&x, &y).is_err()); + assert!( + Lars::::new() + .with_n_nonzero_coefs(5) + .fit(&x, &y) + .is_err() + ); } #[test] @@ -1051,10 +1048,7 @@ mod tests { let x = Array2::from_shape_vec((5, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0]).unwrap(); let y = array![3.0, 5.0, 7.0, 9.0, 11.0]; - let fitted = LassoLars::::new() - .with_alpha(0.0) - .fit(&x, &y) - .unwrap(); + let fitted = LassoLars::::new().with_alpha(0.0).fit(&x, &y).unwrap(); assert_relative_eq!(fitted.coefficients()[0], 2.0, epsilon = 0.1); } @@ -1064,18 +1058,14 @@ mod tests { let x = Array2::from_shape_vec( (10, 3), vec![ - 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, - 5.0, 0.0, 0.0, 6.0, 0.0, 0.0, 7.0, 0.0, 0.0, 8.0, 0.0, 0.0, - 9.0, 0.0, 0.0, 10.0, 0.0, 0.0, + 1.0, 0.0, 0.0, 2.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 0.0, 0.0, 5.0, 0.0, 0.0, 6.0, + 0.0, 0.0, 7.0, 0.0, 0.0, 8.0, 0.0, 0.0, 9.0, 0.0, 0.0, 10.0, 0.0, 0.0, ], ) .unwrap(); let y = array![2.0, 4.0, 6.0, 8.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0]; - let fitted = LassoLars::::new() - .with_alpha(5.0) - .fit(&x, &y) - .unwrap(); + let fitted = LassoLars::::new().with_alpha(5.0).fit(&x, &y).unwrap(); // Irrelevant features (all-zero) should not enter. assert_relative_eq!(fitted.coefficients()[1], 0.0, epsilon = 1e-10); assert_relative_eq!(fitted.coefficients()[2], 0.0, epsilon = 1e-10); @@ -1085,7 +1075,12 @@ mod tests { fn test_lasso_lars_negative_alpha() { let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap(); let y = array![1.0, 2.0, 3.0]; - assert!(LassoLars::::new().with_alpha(-1.0).fit(&x, &y).is_err()); + assert!( + LassoLars::::new() + .with_alpha(-1.0) + .fit(&x, &y) + .is_err() + ); } #[test] @@ -1099,7 +1094,10 @@ mod tests { fn test_lasso_lars_predict() { let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 3.0, 4.0]).unwrap(); let y = array![2.0, 4.0, 6.0, 8.0]; - let fitted = LassoLars::::new().with_alpha(0.01).fit(&x, &y).unwrap(); + let fitted = LassoLars::::new() + .with_alpha(0.01) + .fit(&x, &y) + .unwrap(); let preds = fitted.predict(&x).unwrap(); assert_eq!(preds.len(), 4); } @@ -1108,7 +1106,10 @@ mod tests { fn test_lasso_lars_has_coefficients() { let x = Array2::from_shape_vec((3, 2), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap(); let y = array![1.0, 2.0, 3.0]; - let fitted = LassoLars::::new().with_alpha(0.01).fit(&x, &y).unwrap(); + let fitted = LassoLars::::new() + .with_alpha(0.01) + .fit(&x, &y) + .unwrap(); assert_eq!(fitted.coefficients().len(), 2); } diff --git a/ferrolearn-linear/src/lda.rs b/ferrolearn-linear/src/lda.rs index e81f7fa6..171d3dec 100644 --- a/ferrolearn-linear/src/lda.rs +++ b/ferrolearn-linear/src/lda.rs @@ -154,6 +154,7 @@ impl FittedLDA { /// /// Returns [`FerroError::ShapeMismatch`] if the number of features /// does not match the model. + #[allow(clippy::needless_range_loop)] // index-by-class loop is natural for the softmax row write pub fn predict_proba(&self, x: &Array2) -> Result, FerroError> { let projected = self.transform(x)?; let n_samples = projected.nrows(); diff --git a/ferrolearn-linear/src/lib.rs b/ferrolearn-linear/src/lib.rs index ba5dfd8a..f9bb6072 100644 --- a/ferrolearn-linear/src/lib.rs +++ b/ferrolearn-linear/src/lib.rs @@ -100,8 +100,8 @@ pub use ridge_classifier::{FittedRidgeClassifier, RidgeClassifier}; pub use ridge_cv::{FittedRidgeCV, RidgeCV}; pub use sgd::{FittedSGDClassifier, FittedSGDRegressor, SGDClassifier, SGDRegressor}; pub use svm::{ - FittedSVC, FittedSVR, Kernel, LinearKernel, PolynomialKernel, RbfKernel, SigmoidKernel, SVC, - SVR, + FittedSVC, FittedSVR, Kernel, LinearKernel, PolynomialKernel, RbfKernel, SVC, SVR, + SigmoidKernel, }; use ferrolearn_core::error::FerroError; diff --git a/ferrolearn-linear/src/linear_svc.rs b/ferrolearn-linear/src/linear_svc.rs index 67b898e6..620f4a7f 100644 --- a/ferrolearn-linear/src/linear_svc.rs +++ b/ferrolearn-linear/src/linear_svc.rs @@ -245,8 +245,7 @@ fn solve_binary_primal( hess = hess + c / n_f * xij * xij; } LinearSVCLoss::SquaredHinge => { - grad = grad - two * c / n_f - * (F::one() - margin) * y_signed[i] * xij; + grad = grad - two * c / n_f * (F::one() - margin) * y_signed[i] * xij; hess = hess + two * c / n_f * xij * xij; } } @@ -282,8 +281,7 @@ fn solve_binary_primal( hess_b = hess_b + c / n_f; } LinearSVCLoss::SquaredHinge => { - grad_b = grad_b - two * c / n_f - * (F::one() - margin) * y_signed[i]; + grad_b = grad_b - two * c / n_f * (F::one() - margin) * y_signed[i]; hess_b = hess_b + two * c / n_f; } } @@ -321,11 +319,7 @@ impl Fit, Array1, - y: &Array1, - ) -> Result, FerroError> { + fn fit(&self, x: &Array2, y: &Array1) -> Result, FerroError> { let (n_samples, n_features) = x.dim(); if n_samples != y.len() { @@ -365,7 +359,8 @@ impl Fit, Array1 Fit, Array1 = y.mapv(|label| { - if label == cls { - F::one() - } else { - -F::one() - } - }); + let y_signed: Array1 = + y.mapv(|label| if label == cls { F::one() } else { -F::one() }); let (w, b) = solve_binary_primal(x, &y_signed, self.c, self.max_iter, self.tol, self.loss); @@ -405,9 +395,7 @@ impl Fit, Array1 Predict> - for FittedLinearSVC -{ +impl Predict> for FittedLinearSVC { type Output = Array1; type Error = FerroError; @@ -462,9 +450,7 @@ impl Predict> } } -impl HasCoefficients - for FittedLinearSVC -{ +impl HasCoefficients for FittedLinearSVC { /// Returns the coefficient vector of the first (or only) binary sub-problem. fn coefficients(&self) -> &Array1 { &self.weight_vectors[0] @@ -516,8 +502,7 @@ mod tests { let x = Array2::from_shape_vec( (8, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, ], ) .unwrap(); @@ -555,9 +540,8 @@ mod tests { let x = Array2::from_shape_vec( (9, 2), vec![ - 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, - 10.0, 0.0, 10.5, 0.0, 10.0, 0.5, - 0.0, 10.0, 0.5, 10.0, 0.0, 10.5, + 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 10.0, 0.0, 10.5, 0.0, 10.0, 0.5, 0.0, 10.0, 0.5, + 10.0, 0.0, 10.5, ], ) .unwrap(); @@ -627,7 +611,10 @@ mod tests { .unwrap(); let y = array![0, 0, 0, 1, 1, 1]; - let fitted = LinearSVC::::new().with_max_iter(5000).fit(&x, &y).unwrap(); + let fitted = LinearSVC::::new() + .with_max_iter(5000) + .fit(&x, &y) + .unwrap(); let x_bad = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap(); assert!(fitted.predict(&x_bad).is_err()); diff --git a/ferrolearn-linear/src/linear_svr.rs b/ferrolearn-linear/src/linear_svr.rs index fc933be1..1563acd6 100644 --- a/ferrolearn-linear/src/linear_svr.rs +++ b/ferrolearn-linear/src/linear_svr.rs @@ -128,9 +128,7 @@ pub struct FittedLinearSVR { intercept: F, } -impl Fit, Array1> - for LinearSVR -{ +impl Fit, Array1> for LinearSVR { type Fitted = FittedLinearSVR; type Error = FerroError; @@ -141,11 +139,7 @@ impl Fit, Array1> /// - [`FerroError::ShapeMismatch`] — sample count mismatch. /// - [`FerroError::InvalidParameter`] — `C` not positive or epsilon negative. /// - [`FerroError::InsufficientSamples`] — no samples provided. - fn fit( - &self, - x: &Array2, - y: &Array1, - ) -> Result, FerroError> { + fn fit(&self, x: &Array2, y: &Array1) -> Result, FerroError> { let (n_samples, n_features) = x.dim(); if n_samples != y.len() { @@ -280,9 +274,7 @@ impl Fit, Array1> } } -impl Predict> - for FittedLinearSVR -{ +impl Predict> for FittedLinearSVR { type Output = Array1; type Error = FerroError; @@ -309,9 +301,7 @@ impl Predict> } } -impl HasCoefficients - for FittedLinearSVR -{ +impl HasCoefficients for FittedLinearSVR { fn coefficients(&self) -> &Array1 { &self.coefficients } @@ -388,7 +378,10 @@ mod tests { // Should roughly recover y = 2x. for (p, &t) in preds.iter().zip(y.iter()) { - assert!((p - t).abs() < 3.0, "prediction {p} too far from target {t}"); + assert!( + (p - t).abs() < 3.0, + "prediction {p} too far from target {t}" + ); } } @@ -450,8 +443,8 @@ mod tests { #[test] fn test_has_coefficients() { - let x = Array2::from_shape_vec((4, 2), vec![1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 0.0]) - .unwrap(); + let x = + Array2::from_shape_vec((4, 2), vec![1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 0.0]).unwrap(); let y = array![1.0, 2.0, 3.0, 4.0]; let fitted = LinearSVR::::new() diff --git a/ferrolearn-linear/src/logistic_regression_cv.rs b/ferrolearn-linear/src/logistic_regression_cv.rs index 0a11c4fc..17389784 100644 --- a/ferrolearn-linear/src/logistic_regression_cv.rs +++ b/ferrolearn-linear/src/logistic_regression_cv.rs @@ -187,11 +187,7 @@ impl FittedLogisticRe /// `StratifiedKFold` (#346). /// /// Returns `(train_indices, test_indices)` for fold number `fold`. -fn stratified_kfold_split( - y: &Array1, - k: usize, - fold: usize, -) -> (Vec, Vec) { +fn stratified_kfold_split(y: &Array1, k: usize, fold: usize) -> (Vec, Vec) { // Group sample indices by class. let mut classes: Vec = y.iter().copied().collect(); classes.sort_unstable(); @@ -458,8 +454,8 @@ mod tests { let x = Array2::from_shape_vec( (12, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.5, 1.5, 1.0, 1.8, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, 8.5, 8.5, 8.0, 8.8, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.5, 1.5, 1.0, 1.8, 8.0, 8.0, 8.0, 9.0, + 9.0, 8.0, 9.0, 9.0, 8.5, 8.5, 8.0, 8.8, ], ) .unwrap(); @@ -484,8 +480,7 @@ mod tests { let x = Array2::from_shape_vec( (12, 2), vec![ - 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, - 10.0, 0.0, 10.5, 0.0, 10.0, 0.5, 10.5, 0.5, + 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, 10.0, 0.0, 10.5, 0.0, 10.0, 0.5, 10.5, 0.5, 0.0, 10.0, 0.5, 10.0, 0.0, 10.5, 0.5, 10.5, ], ) @@ -509,8 +504,8 @@ mod tests { let x = Array2::from_shape_vec( (10, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.5, 1.5, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, 8.5, 8.5, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.5, 1.5, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, + 9.0, 9.0, 8.5, 8.5, ], ) .unwrap(); @@ -568,8 +563,8 @@ mod tests { let x = Array2::from_shape_vec( (10, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.5, 1.5, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, 8.5, 8.5, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 1.5, 1.5, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, + 9.0, 9.0, 8.5, 8.5, ], ) .unwrap(); diff --git a/ferrolearn-linear/src/omp.rs b/ferrolearn-linear/src/omp.rs index c8f97cec..93c96600 100644 --- a/ferrolearn-linear/src/omp.rs +++ b/ferrolearn-linear/src/omp.rs @@ -251,8 +251,7 @@ fn ols_active( let xtx = xat.dot(&xa); let xty = xat.dot(y); - let w_active = - cholesky_solve(&xtx, &xty).or_else(|_| gaussian_solve(k, &xtx, &xty))?; + let w_active = cholesky_solve(&xtx, &xty).or_else(|_| gaussian_solve(k, &xtx, &xty))?; let mut w = Array1::::zeros(n_features); for (col_idx, &j) in support.iter().enumerate() { @@ -309,18 +308,13 @@ impl Fit n_features { return Err(FerroError::InvalidParameter { name: "n_nonzero_coefs".into(), - reason: format!( - "cannot exceed number of features ({n_features})" - ), + reason: format!("cannot exceed number of features ({n_features})"), }); } } @@ -492,10 +486,12 @@ mod tests { fn test_shape_mismatch() { let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap(); let y = array![1.0, 2.0]; - assert!(OrthogonalMatchingPursuit::::new() - .with_n_nonzero_coefs(1) - .fit(&x, &y) - .is_err()); + assert!( + OrthogonalMatchingPursuit::::new() + .with_n_nonzero_coefs(1) + .fit(&x, &y) + .is_err() + ); } #[test] @@ -509,10 +505,12 @@ mod tests { fn test_n_nonzero_exceeds_features() { let x = Array2::from_shape_vec((3, 2), vec![1.0, 0.0, 2.0, 0.0, 3.0, 0.0]).unwrap(); let y = array![1.0, 2.0, 3.0]; - assert!(OrthogonalMatchingPursuit::::new() - .with_n_nonzero_coefs(5) - .fit(&x, &y) - .is_err()); + assert!( + OrthogonalMatchingPursuit::::new() + .with_n_nonzero_coefs(5) + .fit(&x, &y) + .is_err() + ); } #[test] @@ -534,9 +532,8 @@ mod tests { let x = Array2::from_shape_vec( (10, 3), vec![ - 1.0, 0.1, 0.01, 2.0, 0.2, 0.02, 3.0, 0.3, 0.03, 4.0, 0.4, 0.04, - 5.0, 0.5, 0.05, 6.0, 0.6, 0.06, 7.0, 0.7, 0.07, 8.0, 0.8, 0.08, - 9.0, 0.9, 0.09, 10.0, 1.0, 0.10, + 1.0, 0.1, 0.01, 2.0, 0.2, 0.02, 3.0, 0.3, 0.03, 4.0, 0.4, 0.04, 5.0, 0.5, 0.05, + 6.0, 0.6, 0.06, 7.0, 0.7, 0.07, 8.0, 0.8, 0.08, 9.0, 0.9, 0.09, 10.0, 1.0, 0.10, ], ) .unwrap(); diff --git a/ferrolearn-linear/src/qda.rs b/ferrolearn-linear/src/qda.rs index 433870f3..4d78c796 100644 --- a/ferrolearn-linear/src/qda.rs +++ b/ferrolearn-linear/src/qda.rs @@ -241,10 +241,7 @@ fn cholesky_inv_and_logdet( // Log-determinant: log|A| = 2 * sum(log(diag(L))). let two = F::from(2.0).unwrap(); - let log_det = (0..n) - .map(|i| l[[i, i]].ln()) - .fold(F::zero(), |a, b| a + b) - * two; + let log_det = (0..n).map(|i| l[[i, i]].ln()).fold(F::zero(), |a, b| a + b) * two; // Compute L^{-1} by forward substitution on each column of I. let mut l_inv = Array2::::zeros((n, n)); @@ -265,9 +262,7 @@ fn cholesky_inv_and_logdet( Ok((a_inv, log_det)) } -impl Fit, Array1> - for QDA -{ +impl Fit, Array1> for QDA { type Fitted = FittedQDA; type Error = FerroError; @@ -280,11 +275,7 @@ impl Fit, Array1, - y: &Array1, - ) -> Result, FerroError> { + fn fit(&self, x: &Array2, y: &Array1) -> Result, FerroError> { let (n_samples, n_features) = x.dim(); if n_samples != y.len() { @@ -392,9 +383,7 @@ impl Fit, Array1 Predict> - for FittedQDA -{ +impl Predict> for FittedQDA { type Output = Array1; type Error = FerroError; @@ -476,8 +465,7 @@ mod tests { let x = Array2::from_shape_vec( (8, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, ], ) .unwrap(); @@ -496,8 +484,7 @@ mod tests { let x = Array2::from_shape_vec( (12, 2), vec![ - 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, - 10.0, 0.0, 10.5, 0.0, 10.0, 0.5, 10.5, 0.5, + 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, 10.0, 0.0, 10.5, 0.0, 10.0, 0.5, 10.5, 0.5, 0.0, 10.0, 0.5, 10.0, 0.0, 10.5, 0.5, 10.5, ], ) @@ -520,8 +507,7 @@ mod tests { let x = Array2::from_shape_vec( (8, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, ], ) .unwrap(); @@ -569,8 +555,7 @@ mod tests { let x = Array2::from_shape_vec( (8, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, ], ) .unwrap(); @@ -587,8 +572,7 @@ mod tests { let x = Array2::from_shape_vec( (8, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, ], ) .unwrap(); @@ -601,11 +585,7 @@ mod tests { #[test] fn test_means() { - let x = Array2::from_shape_vec( - (4, 1), - vec![1.0, 2.0, 5.0, 6.0], - ) - .unwrap(); + let x = Array2::from_shape_vec((4, 1), vec![1.0, 2.0, 5.0, 6.0]).unwrap(); let y = array![0, 0, 1, 1]; let fitted = QDA::::new().with_reg_param(0.1).fit(&x, &y).unwrap(); @@ -615,11 +595,7 @@ mod tests { #[test] fn test_class_with_too_few_samples() { - let x = Array2::from_shape_vec( - (3, 1), - vec![1.0, 5.0, 6.0], - ) - .unwrap(); + let x = Array2::from_shape_vec((3, 1), vec![1.0, 5.0, 6.0]).unwrap(); let y = array![0, 1, 1]; // class 0 has only 1 sample let model = QDA::::new(); diff --git a/ferrolearn-linear/src/quantile_regressor.rs b/ferrolearn-linear/src/quantile_regressor.rs index a68e8db2..959e0900 100644 --- a/ferrolearn-linear/src/quantile_regressor.rs +++ b/ferrolearn-linear/src/quantile_regressor.rs @@ -314,11 +314,7 @@ impl Fit, - y: &Array1, - ) -> Result, FerroError> { + fn fit(&self, x: &Array2, y: &Array1) -> Result, FerroError> { let (n_samples, n_features) = x.dim(); if n_samples != y.len() { @@ -401,7 +397,11 @@ impl Fit= F::zero() { q } else { one - q }; + let asym = if residuals[i] >= F::zero() { + q + } else { + one - q + }; weights[i] = asym / abs_r; } @@ -543,30 +543,36 @@ mod tests { fn test_invalid_quantile_zero() { let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap(); let y = array![1.0, 2.0, 3.0]; - assert!(QuantileRegressor::::new() - .with_quantile(0.0) - .fit(&x, &y) - .is_err()); + assert!( + QuantileRegressor::::new() + .with_quantile(0.0) + .fit(&x, &y) + .is_err() + ); } #[test] fn test_invalid_quantile_one() { let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap(); let y = array![1.0, 2.0, 3.0]; - assert!(QuantileRegressor::::new() - .with_quantile(1.0) - .fit(&x, &y) - .is_err()); + assert!( + QuantileRegressor::::new() + .with_quantile(1.0) + .fit(&x, &y) + .is_err() + ); } #[test] fn test_negative_alpha() { let x = Array2::from_shape_vec((3, 1), vec![1.0, 2.0, 3.0]).unwrap(); let y = array![1.0, 2.0, 3.0]; - assert!(QuantileRegressor::::new() - .with_alpha(-1.0) - .fit(&x, &y) - .is_err()); + assert!( + QuantileRegressor::::new() + .with_alpha(-1.0) + .fit(&x, &y) + .is_err() + ); } #[test] diff --git a/ferrolearn-linear/src/ridge_classifier.rs b/ferrolearn-linear/src/ridge_classifier.rs index c3fdf065..208ccf93 100644 --- a/ferrolearn-linear/src/ridge_classifier.rs +++ b/ferrolearn-linear/src/ridge_classifier.rs @@ -148,8 +148,8 @@ impl FittedRidgeClass } } -impl - Fit, Array1> for RidgeClassifier +impl Fit, Array1> + for RidgeClassifier { type Fitted = FittedRidgeClassifier; type Error = FerroError; @@ -234,11 +234,12 @@ impl .ok_or_else(|| FerroError::NumericalInstability { message: "failed to compute column means".into(), })?; - let y_mean = y_indicator - .mean_axis(Axis(0)) - .ok_or_else(|| FerroError::NumericalInstability { - message: "failed to compute target means".into(), - })?; + let y_mean = + y_indicator + .mean_axis(Axis(0)) + .ok_or_else(|| FerroError::NumericalInstability { + message: "failed to compute target means".into(), + })?; let x_c = x - &x_mean; let y_c = &y_indicator - &y_mean; (x_c, y_c, Some(x_mean), Some(y_mean)) @@ -383,8 +384,7 @@ mod tests { let x = Array2::from_shape_vec( (8, 2), vec![ - 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, - 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, + 1.0, 1.0, 1.0, 2.0, 2.0, 1.0, 2.0, 2.0, 8.0, 8.0, 8.0, 9.0, 9.0, 8.0, 9.0, 9.0, ], ) .unwrap(); @@ -403,9 +403,8 @@ mod tests { let x = Array2::from_shape_vec( (9, 2), vec![ - 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, - 10.0, 0.0, 10.5, 0.0, 10.0, 0.5, - 0.0, 10.0, 0.5, 10.0, 0.0, 10.5, + 0.0, 0.0, 0.5, 0.0, 0.0, 0.5, 10.0, 0.0, 10.5, 0.0, 10.0, 0.5, 0.0, 10.0, 0.5, + 10.0, 0.0, 10.5, ], ) .unwrap(); diff --git a/ferrolearn-linear/tests/api_proof.rs b/ferrolearn-linear/tests/api_proof.rs index f995b1fd..ddef5d31 100644 --- a/ferrolearn-linear/tests/api_proof.rs +++ b/ferrolearn-linear/tests/api_proof.rs @@ -15,18 +15,17 @@ use approx::assert_relative_eq; use ferrolearn_core::introspection::HasClasses; -use ferrolearn_core::traits::{Fit, Predict, Transform}; +use ferrolearn_core::traits::{Fit, Predict}; use ferrolearn_linear::sgd::{ClassifierLoss, LearningRateSchedule, RegressorLoss}; +use ferrolearn_linear::svm::{LinearKernel, RbfKernel}; use ferrolearn_linear::{ ARDRegression, BayesianRidge, ClassifierScore, ElasticNet, ElasticNetCV, GLMFamily, GLMRegressor, GammaRegressor, HuberRegressor, IsotonicRegression, LDA, Lars, Lasso, LassoCV, LassoLars, LinearRegression, LinearSVC, LinearSVCLoss, LinearSVR, LinearSVRLoss, - LogisticRegression, LogisticRegressionCV, NuSVC, NuSVR, OneClassSVM, - OrthogonalMatchingPursuit, PoissonRegressor, QDA, QuantileRegressor, RANSACRegressor, - RegressorScore, Ridge, RidgeCV, RidgeClassifier, SGDClassifier, SGDRegressor, SVC, SVR, - TweedieRegressor, + LogisticRegression, LogisticRegressionCV, NuSVC, NuSVR, OneClassSVM, OrthogonalMatchingPursuit, + PoissonRegressor, QDA, QuantileRegressor, RANSACRegressor, RegressorScore, Ridge, RidgeCV, + RidgeClassifier, SGDClassifier, SGDRegressor, SVC, SVR, TweedieRegressor, }; -use ferrolearn_linear::svm::{LinearKernel, RbfKernel}; use ndarray::{Array1, Array2, array}; /// Two well-separated clusters in 2D (for binary classification). @@ -143,30 +142,49 @@ fn api_proof_elastic_net_family() { #[test] fn api_proof_bayesian_ridge_and_ard() { let (x, y) = regression_data(); - let f = BayesianRidge::::new().with_max_iter(50).fit(&x, &y).unwrap(); + let f = BayesianRidge::::new() + .with_max_iter(50) + .fit(&x, &y) + .unwrap(); let _ = f.score(&x, &y).unwrap(); - let f2 = ARDRegression::::new().with_max_iter(50).fit(&x, &y).unwrap(); + let f2 = ARDRegression::::new() + .with_max_iter(50) + .fit(&x, &y) + .unwrap(); let _ = f2.score(&x, &y).unwrap(); } #[test] fn api_proof_huber_and_quantile() { let (x, y) = regression_data(); - let h = HuberRegressor::::new().with_epsilon(1.35).with_alpha(1e-4).fit(&x, &y).unwrap(); + let h = HuberRegressor::::new() + .with_epsilon(1.35) + .with_alpha(1e-4) + .fit(&x, &y) + .unwrap(); let _ = h.score(&x, &y).unwrap(); - let q = QuantileRegressor::::new().with_quantile(0.5).fit(&x, &y).unwrap(); + let q = QuantileRegressor::::new() + .with_quantile(0.5) + .fit(&x, &y) + .unwrap(); let _ = q.score(&x, &y).unwrap(); } #[test] fn api_proof_glm_family() { let (x, y) = positive_regression_data(); - let f = GLMRegressor::::new(GLMFamily::Poisson).with_alpha(0.1).fit(&x, &y).unwrap(); + let f = GLMRegressor::::new(GLMFamily::Poisson) + .with_alpha(0.1) + .fit(&x, &y) + .unwrap(); let _ = f.score(&x, &y).unwrap(); let _ = PoissonRegressor::::new().fit(&x, &y).unwrap(); let _ = GammaRegressor::::new().fit(&x, &y).unwrap(); - let _ = TweedieRegressor::::new().with_power(1.5).fit(&x, &y).unwrap(); + let _ = TweedieRegressor::::new() + .with_power(1.5) + .fit(&x, &y) + .unwrap(); // Family enum coverage smoke for fam in [GLMFamily::Poisson, GLMFamily::Gamma] { @@ -177,9 +195,15 @@ fn api_proof_glm_family() { #[test] fn api_proof_lars_family() { let (x, y) = regression_data(); - let f = Lars::::new().with_n_nonzero_coefs(1).fit(&x, &y).unwrap(); + let f = Lars::::new() + .with_n_nonzero_coefs(1) + .fit(&x, &y) + .unwrap(); let _ = f.score(&x, &y).unwrap(); - let f2 = LassoLars::::new().with_alpha(0.01).fit(&x, &y).unwrap(); + let f2 = LassoLars::::new() + .with_alpha(0.01) + .fit(&x, &y) + .unwrap(); let _ = f2.score(&x, &y).unwrap(); } @@ -389,7 +413,9 @@ fn api_proof_kernel_svm_family() { let f = nusvr.fit(&xr, &yr).unwrap(); let _ = f.predict(&xr).unwrap(); - let ocsvm = OneClassSVM::new(LinearKernel).with_nu(0.5).with_max_iter(200); + let ocsvm = OneClassSVM::new(LinearKernel) + .with_nu(0.5) + .with_max_iter(200); let f = ocsvm.fit(&x, &()).unwrap(); let _ = f.predict(&x).unwrap(); let _ = f.decision_function(&x).unwrap(); @@ -403,7 +429,10 @@ fn api_proof_isotonic_regression() { let x = Array2::from_shape_vec((6, 1), vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap(); let y = array![1.0, 2.5, 2.0, 4.0, 5.5, 5.0]; - let f = IsotonicRegression::::new().with_increasing(true).fit(&x, &y).unwrap(); + let f = IsotonicRegression::::new() + .with_increasing(true) + .fit(&x, &y) + .unwrap(); let preds = f.predict(&x).unwrap(); assert_eq!(preds.len(), 6); let _ = f.score(&x, &y).unwrap(); diff --git a/ferrolearn-linear/tests/conformance_sklearn.rs b/ferrolearn-linear/tests/conformance_sklearn.rs index 510a3739..02e8194c 100644 --- a/ferrolearn-linear/tests/conformance_sklearn.rs +++ b/ferrolearn-linear/tests/conformance_sklearn.rs @@ -16,9 +16,9 @@ use ferrolearn_core::introspection::HasCoefficients; use ferrolearn_core::{Fit, Predict}; use ferrolearn_test_oracle::{ - assert_close, assert_close_slice, json_to_array1, json_to_array2, json_to_labels, - load_fixture, MIN_LOGISTIC_ACCURACY, TOL_ITERATIVE_LINEAR_ABS, TOL_ITERATIVE_LINEAR_REL, - TOL_LINEAR_FIT_ABS, TOL_LINEAR_FIT_REL, TOL_LOGISTIC_ABS, TOL_LOGISTIC_REL, + MIN_LOGISTIC_ACCURACY, TOL_ITERATIVE_LINEAR_ABS, TOL_ITERATIVE_LINEAR_REL, TOL_LINEAR_FIT_ABS, + TOL_LINEAR_FIT_REL, TOL_LOGISTIC_ABS, TOL_LOGISTIC_REL, assert_close, assert_close_slice, + json_to_array1, json_to_array2, json_to_labels, load_fixture, }; // --------------------------------------------------------------------------- @@ -33,8 +33,7 @@ fn conformance_linear_regression() { let (rel, abs) = fx.tolerance(TOL_LINEAR_FIT_REL, TOL_LINEAR_FIT_ABS); let fit_intercept = fx.params["fit_intercept"].as_bool().unwrap_or(true); - let model = - ferrolearn_linear::LinearRegression::::new().with_fit_intercept(fit_intercept); + let model = ferrolearn_linear::LinearRegression::::new().with_fit_intercept(fit_intercept); let fitted = model.fit(&x, &y).expect("LinearRegression fit"); let expected_coefs = json_to_array1(&fx.expected["coefficients"]); diff --git a/ferrolearn-linear/tests/conformance_surface_coverage.rs b/ferrolearn-linear/tests/conformance_surface_coverage.rs index 27fdbe8f..6ebd144d 100644 --- a/ferrolearn-linear/tests/conformance_surface_coverage.rs +++ b/ferrolearn-linear/tests/conformance_surface_coverage.rs @@ -9,7 +9,7 @@ //! it must fail the build. This pattern is borrowed from ferrotorch and //! ferray's surface-coverage gates. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -22,8 +22,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); assert!( diff --git a/ferrolearn-linear/tests/conformance_wave1.rs b/ferrolearn-linear/tests/conformance_wave1.rs index 576ec42c..792f90be 100644 --- a/ferrolearn-linear/tests/conformance_wave1.rs +++ b/ferrolearn-linear/tests/conformance_wave1.rs @@ -14,9 +14,8 @@ use ferrolearn_core::introspection::HasCoefficients; use ferrolearn_core::{Fit, Predict}; use ferrolearn_test_oracle::{ - assert_close, assert_close_slice, json_to_array1, json_to_array2, load_fixture, TOL_ITERATIVE_LINEAR_ABS, TOL_ITERATIVE_LINEAR_REL, TOL_LINEAR_FIT_ABS, TOL_LINEAR_FIT_REL, - TOL_LOGISTIC_ABS, TOL_LOGISTIC_REL, + assert_close, assert_close_slice, json_to_array1, json_to_array2, load_fixture, }; /// sklearn's `gamma="scale"` formula: `1 / (n_features * X.var())`. @@ -710,8 +709,7 @@ fn conformance_isotonic_regression() { // wrap as a single-feature matrix. let x_vec = json_to_array1(&fx.input["x"]); let n = x_vec.len(); - let x = - ndarray::Array2::from_shape_vec((n, 1), x_vec.as_slice().unwrap().to_vec()).unwrap(); + let x = ndarray::Array2::from_shape_vec((n, 1), x_vec.as_slice().unwrap().to_vec()).unwrap(); let y = json_to_array1(&fx.input["y"]); let (rel, abs) = fx.tolerance(1e-4, 1e-6); @@ -845,10 +843,7 @@ fn conformance_sgd_classifier() { .with_random_state(random_state); let fitted = model.fit(&x, &y).expect("SGDClassifier fit"); - let preds: Vec = fitted - .predict(&x) - .expect("SGDClassifier predict") - .to_vec(); + let preds: Vec = fitted.predict(&x).expect("SGDClassifier predict").to_vec(); let expected_classes: Vec = fx.expected["predicted_classes"] .as_array() .unwrap() @@ -894,7 +889,10 @@ fn conformance_sgd_regressor() { .map(|(&a, &e)| (a - e).powi(2)) .sum(); let r2 = 1.0 - ss_res / ss_tot; - assert!(r2 >= 0.85, "SGDRegressor R² with sklearn = {r2:.4}, floor 0.85"); + assert!( + r2 >= 0.85, + "SGDRegressor R² with sklearn = {r2:.4}, floor 0.85" + ); } // --------------------------------------------------------------------------- @@ -1045,7 +1043,10 @@ fn conformance_nu_svr_rbf() { // libsvm; with normalized y and gamma=scale they reach moderately similar // fits, but the nu-formulation has a free rho parameter that diverges // between the two implementations more than the C-formulation's bias does. - assert!(r2 >= 0.40, "NuSVR(RBF) R² with sklearn = {r2:.4}, floor 0.40"); + assert!( + r2 >= 0.40, + "NuSVR(RBF) R² with sklearn = {r2:.4}, floor 0.40" + ); } #[test] diff --git a/ferrolearn-metrics/tests/conformance_surface_coverage.rs b/ferrolearn-metrics/tests/conformance_surface_coverage.rs index 58fd0ab4..7bc742be 100644 --- a/ferrolearn-metrics/tests/conformance_surface_coverage.rs +++ b/ferrolearn-metrics/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-model-sel/src/dummy.rs b/ferrolearn-model-sel/src/dummy.rs index 0b98b660..6ecb3ea7 100644 --- a/ferrolearn-model-sel/src/dummy.rs +++ b/ferrolearn-model-sel/src/dummy.rs @@ -357,7 +357,7 @@ mod tests { let fitted = clf.fit(&x(), &y).unwrap(); let preds = fitted.predict(&x()).unwrap(); for p in preds.iter() { - assert!(matches!(*p, 0 | 1 | 2)); + assert!(matches!(*p, 0..=2)); } } @@ -368,7 +368,7 @@ mod tests { let fitted = clf.fit(&x(), &y).unwrap(); let preds = fitted.predict(&x()).unwrap(); for p in preds.iter() { - assert!(matches!(*p, 0 | 1 | 2)); + assert!(matches!(*p, 0..=2)); } } diff --git a/ferrolearn-model-sel/src/splitters.rs b/ferrolearn-model-sel/src/splitters.rs index c8d6559f..3ad645bf 100644 --- a/ferrolearn-model-sel/src/splitters.rs +++ b/ferrolearn-model-sel/src/splitters.rs @@ -457,7 +457,7 @@ mod tests { assert_eq!(folds.len(), 3); for (train, test) in &folds { assert_eq!(train.len() + test.len(), 8); - assert!(test.len() >= 1); + assert!(!test.is_empty()); } } diff --git a/ferrolearn-model-sel/tests/api_proof.rs b/ferrolearn-model-sel/tests/api_proof.rs index 494bfed8..5f8e949e 100644 --- a/ferrolearn-model-sel/tests/api_proof.rs +++ b/ferrolearn-model-sel/tests/api_proof.rs @@ -233,10 +233,10 @@ fn api_proof_search_estimators() { // ============================================================================= #[test] fn api_proof_meta_estimators() { - let _ovr = OneVsRestClassifier::new(Box::new(|| Pipeline::::new())); - let _ovo = OneVsOneClassifier::new(Box::new(|| Pipeline::::new())); - let _moc = MultiOutputClassifier::new(Box::new(|| Pipeline::::new())); - let _mor = MultiOutputRegressor::new(Box::new(|| Pipeline::::new())); + let _ovr = OneVsRestClassifier::new(Box::new(Pipeline::::new)); + let _ovo = OneVsOneClassifier::new(Box::new(Pipeline::::new)); + let _moc = MultiOutputClassifier::new(Box::new(Pipeline::::new)); + let _mor = MultiOutputRegressor::new(Box::new(Pipeline::::new)); // CalibratedClassifierCV — constructor + method enum let fit_fn: CalibFitFn = Box::new(|_x, _y| { diff --git a/ferrolearn-model-sel/tests/conformance_sklearn.rs b/ferrolearn-model-sel/tests/conformance_sklearn.rs index c28ff21b..7f22570d 100644 --- a/ferrolearn-model-sel/tests/conformance_sklearn.rs +++ b/ferrolearn-model-sel/tests/conformance_sklearn.rs @@ -22,7 +22,10 @@ fn conformance_kfold() { let n_splits = fx.params["n_splits"].as_u64().unwrap() as usize; let shuffle = fx.params["shuffle"].as_bool().unwrap_or(false); - assert!(!shuffle, "fixture must use shuffle=false for deterministic comparison"); + assert!( + !shuffle, + "fixture must use shuffle=false for deterministic comparison" + ); let kf = ferrolearn_model_sel::KFold::new(n_splits); let folds = kf.split(n_samples); diff --git a/ferrolearn-model-sel/tests/conformance_surface_coverage.rs b/ferrolearn-model-sel/tests/conformance_surface_coverage.rs index 5efd8f4a..06d78061 100644 --- a/ferrolearn-model-sel/tests/conformance_surface_coverage.rs +++ b/ferrolearn-model-sel/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-model-sel/tests/conformance_wave7.rs b/ferrolearn-model-sel/tests/conformance_wave7.rs index 6db966ac..1a65ed0d 100644 --- a/ferrolearn-model-sel/tests/conformance_wave7.rs +++ b/ferrolearn-model-sel/tests/conformance_wave7.rs @@ -18,7 +18,11 @@ fn json_to_usize_vec(value: &serde_json::Value) -> Vec { .collect() } -fn assert_folds_match(actual: &[(Vec, Vec)], expected: &serde_json::Value, label: &str) { +fn assert_folds_match( + actual: &[(Vec, Vec)], + expected: &serde_json::Value, + label: &str, +) { let expected_folds = expected.as_array().unwrap(); assert_eq!( actual.len(), @@ -83,11 +87,7 @@ fn conformance_shuffle_split() { assert_eq!(folds.len(), expected_folds.len(), "ShuffleSplit n_splits"); let total_size = (test_size * n as f64).round() as usize; for (i, (_train, test)) in folds.iter().enumerate() { - assert_eq!( - test.len(), - total_size, - "ShuffleSplit fold {i} test size" - ); + assert_eq!(test.len(), total_size, "ShuffleSplit fold {i} test size"); } } @@ -97,7 +97,9 @@ fn conformance_group_kfold() { let groups_vec = json_to_usize_vec(&fx.input["groups"]); let groups = ndarray::Array1::from_vec(groups_vec); let n_splits = fx.params["n_splits"].as_u64().unwrap() as usize; - let folds = GroupKFold::new(n_splits).split(&groups).expect("GroupKFold split"); + let folds = GroupKFold::new(n_splits) + .split(&groups) + .expect("GroupKFold split"); // GroupKFold assignment depends on group-size ordering — both libraries // should produce the same partition up to fold permutation. let expected_folds = fx.expected["folds"].as_array().unwrap(); @@ -127,7 +129,9 @@ fn conformance_group_shuffle_split() { let groups = ndarray::Array1::from_vec(groups_vec); let n_splits = fx.params["n_splits"].as_u64().unwrap() as usize; let test_size = fx.params["test_size"].as_f64().unwrap(); - let folds = GroupShuffleSplit::new(n_splits, test_size).split(&groups).expect("GSS split"); + let folds = GroupShuffleSplit::new(n_splits, test_size) + .split(&groups) + .expect("GSS split"); let expected_folds = fx.expected["folds"].as_array().unwrap(); assert_eq!(folds.len(), expected_folds.len(), "GSS n_splits"); for (_train, test) in folds.iter() { diff --git a/ferrolearn-neighbors/src/local_outlier_factor.rs b/ferrolearn-neighbors/src/local_outlier_factor.rs index edaa6c0e..d95a89be 100644 --- a/ferrolearn-neighbors/src/local_outlier_factor.rs +++ b/ferrolearn-neighbors/src/local_outlier_factor.rs @@ -386,8 +386,7 @@ impl FittedLocalOutlierFactor { } // Compute LOF for new data against the training set. - let train_data: Vec> = - (0..n_train).map(|i| self.x_train.row(i).to_vec()).collect(); + let train_data: Vec> = (0..n_train).map(|i| self.x_train.row(i).to_vec()).collect(); let effective_k = self.n_neighbors.min(n_train); let eps = F::from(1e-15).unwrap(); diff --git a/ferrolearn-neighbors/tests/api_proof.rs b/ferrolearn-neighbors/tests/api_proof.rs index 40a2536c..b9455e62 100644 --- a/ferrolearn-neighbors/tests/api_proof.rs +++ b/ferrolearn-neighbors/tests/api_proof.rs @@ -72,7 +72,9 @@ fn api_proof_kneighbors_classifier() { let (dists, idxs) = f.kneighbors(&x, None).unwrap(); assert_eq!(dists.dim(), (6, 3)); assert_eq!(idxs.dim(), (6, 3)); - let g = f.kneighbors_graph(&x, None, GraphMode::Connectivity).unwrap(); + let g = f + .kneighbors_graph(&x, None, GraphMode::Connectivity) + .unwrap(); assert_eq!(g.n_rows(), 6); assert_eq!(g.n_cols(), 6); assert_eq!(g.nnz(), 6 * 3); @@ -86,7 +88,12 @@ fn api_proof_kneighbors_classifier() { // Default impl + every Algorithm and Weights variant compiles. let _: KNeighborsClassifier = Default::default(); - for alg in [Algorithm::Auto, Algorithm::BruteForce, Algorithm::KdTree, Algorithm::BallTree] { + for alg in [ + Algorithm::Auto, + Algorithm::BruteForce, + Algorithm::KdTree, + Algorithm::BallTree, + ] { for w in [Weights::Uniform, Weights::Distance] { let _ = KNeighborsClassifier::::new() .with_algorithm(alg) @@ -119,7 +126,9 @@ fn api_proof_kneighbors_regressor() { let (dists, idxs) = f.kneighbors(&x, Some(2)).unwrap(); assert_eq!(dists.dim(), (6, 2)); assert_eq!(idxs.dim(), (6, 2)); - let g = f.kneighbors_graph(&x, Some(2), GraphMode::Distance).unwrap(); + let g = f + .kneighbors_graph(&x, Some(2), GraphMode::Distance) + .unwrap(); assert_eq!(g.nnz(), 6 * 2); assert_eq!(f.n_samples_fit(), 6); @@ -217,7 +226,9 @@ fn api_proof_nearest_neighbors() { let pairs = nn.radius_neighbors(&x, 1.0).unwrap(); assert_eq!(pairs.len(), 6); - let g = nn.kneighbors_graph(&x, None, GraphMode::Connectivity).unwrap(); + let g = nn + .kneighbors_graph(&x, None, GraphMode::Connectivity) + .unwrap(); assert_eq!(g.n_rows(), 6); assert_eq!(g.n_cols(), 6); let g2 = nn @@ -366,7 +377,9 @@ fn api_proof_graph_free_functions() { fn api_proof_f32_compiles() { let x32 = Array2::from_shape_vec( (6, 2), - vec![0.0f32, 0.0, 0.5, 0.0, 0.0, 0.5, 5.0, 5.0, 5.5, 5.0, 5.0, 5.5], + vec![ + 0.0f32, 0.0, 0.5, 0.0, 0.0, 0.5, 5.0, 5.0, 5.5, 5.0, 5.0, 5.5, + ], ) .unwrap(); let y_cls = array![0usize, 0, 0, 1, 1, 1]; diff --git a/ferrolearn-neighbors/tests/conformance_sklearn.rs b/ferrolearn-neighbors/tests/conformance_sklearn.rs index 1e830d36..aaac9728 100644 --- a/ferrolearn-neighbors/tests/conformance_sklearn.rs +++ b/ferrolearn-neighbors/tests/conformance_sklearn.rs @@ -46,11 +46,7 @@ fn conformance_kneighbors_classifier() { assert_labels_equal(&preds_i64, &expected_preds, "KNeighborsClassifier.predict"); // Accuracy is a single scalar — match within metric tolerance. - let n_correct = preds - .iter() - .zip(y.iter()) - .filter(|(a, b)| a == b) - .count(); + let n_correct = preds.iter().zip(y.iter()).filter(|(a, b)| a == b).count(); let accuracy = n_correct as f64 / y.len() as f64; assert_close( accuracy, diff --git a/ferrolearn-neighbors/tests/conformance_surface_coverage.rs b/ferrolearn-neighbors/tests/conformance_surface_coverage.rs index e0382c23..df32c767 100644 --- a/ferrolearn-neighbors/tests/conformance_surface_coverage.rs +++ b/ferrolearn-neighbors/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-neighbors/tests/conformance_wave4.rs b/ferrolearn-neighbors/tests/conformance_wave4.rs index da2271ff..a68e7cd8 100644 --- a/ferrolearn-neighbors/tests/conformance_wave4.rs +++ b/ferrolearn-neighbors/tests/conformance_wave4.rs @@ -28,7 +28,11 @@ fn conformance_nearest_centroid() { .iter() .map(|v| v.as_u64().unwrap() as usize) .collect(); - let matches = preds.iter().zip(expected.iter()).filter(|(a, e)| a == e).count(); + let matches = preds + .iter() + .zip(expected.iter()) + .filter(|(a, e)| a == e) + .count(); let acc = matches as f64 / preds.len() as f64; assert!( acc >= 0.95, @@ -49,25 +53,18 @@ fn conformance_nearest_neighbors() { let (dists, idxs) = fitted .kneighbors(&query, Some(n_neighbors)) .expect("kneighbors"); - let expected_dists = fx.expected["distances"] - .as_array() - .unwrap(); - assert_eq!( - dists.nrows(), - expected_dists.len(), - "kneighbors dists rows" - ); + let expected_dists = fx.expected["distances"].as_array().unwrap(); + assert_eq!(dists.nrows(), expected_dists.len(), "kneighbors dists rows"); assert_eq!(dists.ncols(), n_neighbors, "kneighbors dists cols"); - assert_eq!( - idxs.nrows(), - expected_dists.len(), - "kneighbors idxs rows" - ); + assert_eq!(idxs.nrows(), expected_dists.len(), "kneighbors idxs rows"); // Distance to closest neighbor (self) should be ~0; verify monotone increasing. for row in dists.rows() { let mut prev = -1.0; for &d in row.iter() { - assert!(d >= prev - 1e-9, "kneighbors dists not monotone: {d} after {prev}"); + assert!( + d >= prev - 1e-9, + "kneighbors dists not monotone: {d} after {prev}" + ); prev = d; } } @@ -83,21 +80,20 @@ fn conformance_local_outlier_factor() { let model = LocalOutlierFactor::::new() .with_n_neighbors(n_neighbors) .with_contamination(contamination); - let preds = model - .fit_predict(&x) - .expect("LOF fit_predict"); + let preds = model.fit_predict(&x).expect("LOF fit_predict"); let expected: Vec = fx.expected["predictions"] .as_array() .unwrap() .iter() .map(|v| v.as_i64().unwrap()) .collect(); - let matches = preds.iter().zip(expected.iter()).filter(|&(&a, &e)| a as i64 == e).count(); + let matches = preds + .iter() + .zip(expected.iter()) + .filter(|&(&a, &e)| a as i64 == e) + .count(); let frac = matches as f64 / preds.len() as f64; - assert!( - frac >= 0.80, - "LOF +1/-1 agreement {frac:.4} < 0.80 floor" - ); + assert!(frac >= 0.80, "LOF +1/-1 agreement {frac:.4} < 0.80 floor"); } #[test] @@ -122,7 +118,11 @@ fn conformance_radius_neighbors_classifier() { .iter() .map(|v| v.as_u64().unwrap() as usize) .collect(); - let matches = preds.iter().zip(expected.iter()).filter(|(a, e)| a == e).count(); + let matches = preds + .iter() + .zip(expected.iter()) + .filter(|(a, e)| a == e) + .count(); let acc = matches as f64 / preds.len() as f64; assert!( acc >= 0.90, @@ -143,7 +143,11 @@ fn conformance_radius_neighbors_regressor() { let y_mean = y.iter().sum::() / y.len() as f64; let ss_tot: f64 = y.iter().map(|v| (v - y_mean).powi(2)).sum(); - let ss_res: f64 = preds.iter().zip(y.iter()).map(|(a, e)| (a - e).powi(2)).sum(); + let ss_res: f64 = preds + .iter() + .zip(y.iter()) + .map(|(a, e)| (a - e).powi(2)) + .sum(); let r2 = 1.0 - ss_res / ss_tot; let expected_r2 = fx.expected["r2"].as_f64().unwrap_or(0.5); assert!( diff --git a/ferrolearn-neural/src/mlp.rs b/ferrolearn-neural/src/mlp.rs index 732dafc5..a5ae46f9 100644 --- a/ferrolearn-neural/src/mlp.rs +++ b/ferrolearn-neural/src/mlp.rs @@ -1565,7 +1565,9 @@ mod tests { #[test] fn test_activate_identity() { - let mut z = Array2::from_shape_vec((1, 2), vec![3.14, -2.71]).unwrap(); + // Arbitrary nonzero test values — clippy's approx_constant lint + // false-positives on `3.14` thinking we meant `f32::consts::PI`. + let mut z = Array2::from_shape_vec((1, 2), vec![1.25, -2.71]).unwrap(); let original = z.clone(); activate_inplace(&mut z, Activation::Identity); assert_relative_eq!(z[[0, 0]], original[[0, 0]], epsilon = 1e-15); diff --git a/ferrolearn-neural/tests/conformance_sklearn.rs b/ferrolearn-neural/tests/conformance_sklearn.rs index b2f9e8ec..b0a5e8fd 100644 --- a/ferrolearn-neural/tests/conformance_sklearn.rs +++ b/ferrolearn-neural/tests/conformance_sklearn.rs @@ -38,11 +38,7 @@ fn conformance_mlp_classifier() { let preds = fitted.predict(&x).expect("MLPClassifier predict"); let expected_acc = fx.expected["accuracy"].as_f64().unwrap_or(0.5); - let matches: usize = preds - .iter() - .zip(y.iter()) - .filter(|(a, e)| a == e) - .count(); + let matches: usize = preds.iter().zip(y.iter()).filter(|(a, e)| a == e).count(); let acc = matches as f64 / y.len() as f64; // sklearn at Adam defaults also doesn't fully converge on this n=100, // p=5, 3-class problem; both libraries achieve similar mediocre Adam diff --git a/ferrolearn-neural/tests/conformance_surface_coverage.rs b/ferrolearn-neural/tests/conformance_surface_coverage.rs index 7807c12a..713c9844 100644 --- a/ferrolearn-neural/tests/conformance_surface_coverage.rs +++ b/ferrolearn-neural/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-neural/tests/conformance_wave4.rs b/ferrolearn-neural/tests/conformance_wave4.rs index 28b17b0e..46513b72 100644 --- a/ferrolearn-neural/tests/conformance_wave4.rs +++ b/ferrolearn-neural/tests/conformance_wave4.rs @@ -2,7 +2,7 @@ //! //! MLPRegressor + BernoulliRBM. Stochastic models — we use accuracy/R² floors. -use ferrolearn_core::{Fit, Predict, Transform}; +use ferrolearn_core::{Fit, Predict}; use ferrolearn_neural::{Activation, BernoulliRBM, MLPRegressor, Solver}; use ferrolearn_test_oracle::{json_to_array1, json_to_array2, load_fixture}; @@ -39,7 +39,11 @@ fn conformance_mlp_regressor() { // convergence on this fixture, not by a library-specific bug. let y_mean = y.iter().sum::() / y.len() as f64; let ss_tot: f64 = y.iter().map(|v| (v - y_mean).powi(2)).sum(); - let ss_res: f64 = preds.iter().zip(y.iter()).map(|(a, e)| (a - e).powi(2)).sum(); + let ss_res: f64 = preds + .iter() + .zip(y.iter()) + .map(|(a, e)| (a - e).powi(2)) + .sum(); let r2 = 1.0 - ss_res / ss_tot; let expected_r2 = fx.expected["r2"].as_f64().unwrap_or(0.5); // Ferrolearn R² must be within 0.15 absolute of sklearn's R² diff --git a/ferrolearn-numerical/src/optimize.rs b/ferrolearn-numerical/src/optimize.rs index 37d49f67..98d937ad 100644 --- a/ferrolearn-numerical/src/optimize.rs +++ b/ferrolearn-numerical/src/optimize.rs @@ -764,8 +764,7 @@ impl Powell { if f_extrap < fx_start { let two_term = 2.0 * (fx_start - 2.0 * fx + f_extrap); let lhs = fx_start - fx - largest_decrease; - let test = - two_term * lhs * lhs - largest_decrease * (fx_start - f_extrap).powi(2); + let test = two_term * lhs * lhs - largest_decrease * (fx_start - f_extrap).powi(2); if test < 0.0 { let (alpha, f_after) = line_minimise_powell(&mut f, &x, &delta, self); for (xk, dk) in x.iter_mut().zip(delta.iter()) { diff --git a/ferrolearn-numerical/src/special.rs b/ferrolearn-numerical/src/special.rs index d083bf23..6eb5c156 100644 --- a/ferrolearn-numerical/src/special.rs +++ b/ferrolearn-numerical/src/special.rs @@ -218,11 +218,11 @@ mod tests { #[test] fn digamma_known_values() { // ψ(1) = -γ ≈ -0.577_215_664_901_532_86 - assert!(approx(digamma(1.0), -0.577_215_664_901_532_86, 1e-9)); + assert!(approx(digamma(1.0), -0.577_215_664_901_532_9, 1e-9)); // ψ(2) = 1 - γ - assert!(approx(digamma(2.0), 1.0 - 0.577_215_664_901_532_86, 1e-9)); + assert!(approx(digamma(2.0), 1.0 - 0.577_215_664_901_532_9, 1e-9)); // ψ(0.5) = -γ - 2 ln 2 - let expected = -0.577_215_664_901_532_86 - 2.0 * 2.0_f64.ln(); + let expected = -0.577_215_664_901_532_9 - 2.0 * 2.0_f64.ln(); assert!(approx(digamma(0.5), expected, 1e-9)); } diff --git a/ferrolearn-numerical/tests/conformance_sklearn.rs b/ferrolearn-numerical/tests/conformance_sklearn.rs index 602f1eb6..fd433485 100644 --- a/ferrolearn-numerical/tests/conformance_sklearn.rs +++ b/ferrolearn-numerical/tests/conformance_sklearn.rs @@ -10,8 +10,8 @@ use ferrolearn_numerical::distributions::{ use ferrolearn_numerical::interpolate::{BoundaryCondition, CubicSpline}; use ferrolearn_numerical::sparse_eig::{LanczosSolver, WhichEigenvalues}; use ferrolearn_test_oracle::{ - assert_close, assert_close_slice, load_fixture, parse_f64_value, TOL_NUMERICAL_ABS, - TOL_NUMERICAL_REL, + TOL_NUMERICAL_ABS, TOL_NUMERICAL_REL, assert_close, assert_close_slice, load_fixture, + parse_f64_value, }; fn json_f64_vec(v: &serde_json::Value) -> Vec { @@ -99,8 +99,20 @@ fn conformance_distributions() { let pdf: Vec = points.iter().map(|&x| dist.pdf(x)).collect(); let cdf: Vec = points.iter().map(|&x| dist.cdf(x)).collect(); let sf: Vec = points.iter().map(|&x| dist.sf(x)).collect(); - assert_close_slice(&pdf, &json_f64_vec(&block["pdf"]), rel, abs, "ChiSquared.pdf"); - assert_close_slice(&cdf, &json_f64_vec(&block["cdf"]), rel, abs, "ChiSquared.cdf"); + assert_close_slice( + &pdf, + &json_f64_vec(&block["pdf"]), + rel, + abs, + "ChiSquared.pdf", + ); + assert_close_slice( + &cdf, + &json_f64_vec(&block["cdf"]), + rel, + abs, + "ChiSquared.cdf", + ); assert_close_slice(&sf, &json_f64_vec(&block["sf"]), rel, abs, "ChiSquared.sf"); } @@ -128,8 +140,20 @@ fn conformance_distributions() { let pdf: Vec = points.iter().map(|&x| dist.pdf(x)).collect(); let cdf: Vec = points.iter().map(|&x| dist.cdf(x)).collect(); let sf: Vec = points.iter().map(|&x| dist.sf(x)).collect(); - assert_close_slice(&pdf, &json_f64_vec(&block["pdf"]), rel, abs, "StudentsT.pdf"); - assert_close_slice(&cdf, &json_f64_vec(&block["cdf"]), rel, abs, "StudentsT.cdf"); + assert_close_slice( + &pdf, + &json_f64_vec(&block["pdf"]), + rel, + abs, + "StudentsT.pdf", + ); + assert_close_slice( + &cdf, + &json_f64_vec(&block["cdf"]), + rel, + abs, + "StudentsT.cdf", + ); assert_close_slice(&sf, &json_f64_vec(&block["sf"]), rel, abs, "StudentsT.sf"); } @@ -178,7 +202,7 @@ fn conformance_sparse_eigsh() { .as_array() .unwrap() .iter() - .map(|row| json_f64_vec(row)) + .map(json_f64_vec) .collect(); assert_eq!(matrix_rows.len(), n, "matrix must be n x n"); diff --git a/ferrolearn-preprocess/src/feature_scoring.rs b/ferrolearn-preprocess/src/feature_scoring.rs index fc970afc..d4bc790b 100644 --- a/ferrolearn-preprocess/src/feature_scoring.rs +++ b/ferrolearn-preprocess/src/feature_scoring.rs @@ -760,7 +760,7 @@ mod tests { let y: Array1 = array![0, 0, 0, 1, 1, 1]; let (_, p_vals) = f_classif(&x, &y).unwrap(); for &p in p_vals.iter() { - assert!(p >= 0.0 && p <= 1.0, "p-value {p} out of bounds"); + assert!((0.0..=1.0).contains(&p), "p-value {p} out of bounds"); } } @@ -812,7 +812,7 @@ mod tests { let y: Array1 = array![1.0, 2.0, 3.0, 4.0, 5.0]; let (_, p_vals) = f_regression(&x, &y).unwrap(); for &p in p_vals.iter() { - assert!(p >= 0.0 && p <= 1.0, "p-value {p} out of bounds"); + assert!((0.0..=1.0).contains(&p), "p-value {p} out of bounds"); } } @@ -893,7 +893,7 @@ mod tests { let y: Array1 = array![0, 1, 0, 1, 0, 1]; let (_, p_vals) = chi2(&x, &y).unwrap(); for &p in p_vals.iter() { - assert!(p >= 0.0 && p <= 1.0, "p-value {p} out of bounds"); + assert!((0.0..=1.0).contains(&p), "p-value {p} out of bounds"); } } diff --git a/ferrolearn-preprocess/src/ordinal_encoder.rs b/ferrolearn-preprocess/src/ordinal_encoder.rs index 2aed1cc8..1c08a96a 100644 --- a/ferrolearn-preprocess/src/ordinal_encoder.rs +++ b/ferrolearn-preprocess/src/ordinal_encoder.rs @@ -120,8 +120,7 @@ impl Fit, ()> for OrdinalEncoder { // documents `categories_ = sorted(unique(X[:, j]))`. (Older // ferrolearn versions used first-seen order — #344.) let mut unique: Vec = Vec::new(); - let mut seen_set: std::collections::HashSet = - std::collections::HashSet::new(); + let mut seen_set: std::collections::HashSet = std::collections::HashSet::new(); for i in 0..n_samples { let cat = &x[[i, j]]; if seen_set.insert(cat.clone()) { diff --git a/ferrolearn-preprocess/tests/api_proof.rs b/ferrolearn-preprocess/tests/api_proof.rs index f4e8f1ac..8eb3cf02 100644 --- a/ferrolearn-preprocess/tests/api_proof.rs +++ b/ferrolearn-preprocess/tests/api_proof.rs @@ -273,7 +273,7 @@ fn api_proof_text() { let f = CountVectorizer::new().fit(&docs).unwrap(); let counts = f.transform(&docs).unwrap(); assert_eq!(counts.nrows(), 3); - let counts_f64 = counts.mapv(|v| v as f64); + let counts_f64 = counts.mapv(|v| v); let f = TfidfTransformer::::new().fit(&counts_f64).unwrap(); let _ = f.transform(&counts_f64).unwrap(); } diff --git a/ferrolearn-preprocess/tests/conformance_surface_coverage.rs b/ferrolearn-preprocess/tests/conformance_surface_coverage.rs index 3881a29a..13c68e81 100644 --- a/ferrolearn-preprocess/tests/conformance_surface_coverage.rs +++ b/ferrolearn-preprocess/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-preprocess/tests/conformance_wave6.rs b/ferrolearn-preprocess/tests/conformance_wave6.rs index 3e421389..184e31b9 100644 --- a/ferrolearn-preprocess/tests/conformance_wave6.rs +++ b/ferrolearn-preprocess/tests/conformance_wave6.rs @@ -8,11 +8,11 @@ use ferrolearn_core::{Fit, Transform}; use ferrolearn_preprocess::{ + FunctionTransformer, GaussianRandomProjection, LabelBinarizer, MultiLabelBinarizer, + OrdinalEncoder, SelectPercentile, SparseRandomProjection, VarianceThreshold, feature_selection::{ScoreFunc, SelectFromModel, SelectKBest}, knn_imputer::{KNNImputer, KNNWeights}, spline_transformer::{KnotStrategy, SplineTransformer}, - FunctionTransformer, GaussianRandomProjection, LabelBinarizer, MultiLabelBinarizer, - OrdinalEncoder, SelectPercentile, SparseRandomProjection, VarianceThreshold, }; use ferrolearn_test_oracle::{json_to_array1, json_to_array2, load_fixture}; @@ -99,10 +99,7 @@ fn conformance_multilabel_binarizer() { let expected = json_to_array2(&fx.expected["transformed"]); assert_eq!(yt.shape(), expected.shape(), "MLB shape"); for (i, (&a, &e)) in yt.iter().zip(expected.iter()).enumerate() { - assert!( - (a - e).abs() < 1e-12, - "MLB[{i}] actual={a} expected={e}" - ); + assert!((a - e).abs() < 1e-12, "MLB[{i}] actual={a} expected={e}"); } } @@ -118,10 +115,7 @@ fn conformance_variance_threshold() { let expected = json_to_array2(&fx.expected["transformed"]); assert_eq!(xt.shape(), expected.shape(), "VT shape"); for (i, (&a, &e)) in xt.iter().zip(expected.iter()).enumerate() { - assert!( - (a - e).abs() < 1e-12, - "VT[{i}] actual={a} expected={e}" - ); + assert!((a - e).abs() < 1e-12, "VT[{i}] actual={a} expected={e}"); } } @@ -183,8 +177,8 @@ fn conformance_select_from_model() { let c = fx.params["C"].as_f64().unwrap_or(1.0); let max_iter = fx.params["max_iter"].as_u64().unwrap_or(500) as usize; - use ferrolearn_core::introspection::HasCoefficients; use ferrolearn_core::Fit; + use ferrolearn_core::introspection::HasCoefficients; let lr = ferrolearn_linear::LogisticRegression::::new() .with_c(c) .with_max_iter(max_iter); @@ -232,8 +226,8 @@ fn conformance_rfe() { let y = ndarray::Array1::from_vec(y_vec); let n_keep = fx.params["n_features_to_select"].as_u64().unwrap_or(4) as usize; - use ferrolearn_core::introspection::HasCoefficients; use ferrolearn_core::Fit; + use ferrolearn_core::introspection::HasCoefficients; let lr = ferrolearn_linear::LogisticRegression::::new(); let fitted = lr.fit(&x, &y).expect("LogisticRegression fit"); let importances: ndarray::Array1 = fitted.coefficients().mapv(|v| v.abs()); @@ -365,10 +359,7 @@ fn conformance_function_transformer() { let xt = model.transform(&x).expect("FunctionTransformer transform"); let expected = json_to_array2(&fx.expected["transformed"]); for (i, (&a, &e)) in xt.iter().zip(expected.iter()).enumerate() { - assert!( - (a - e).abs() < 1e-12, - "FT[{i}] actual={a} expected={e}" - ); + assert!((a - e).abs() < 1e-12, "FT[{i}] actual={a} expected={e}"); } // silence unused warning let _ = json_to_array1; diff --git a/ferrolearn-python/src/extras.rs b/ferrolearn-python/src/extras.rs index 0069e698..cbe09330 100644 --- a/ferrolearn-python/src/extras.rs +++ b/ferrolearn-python/src/extras.rs @@ -227,9 +227,21 @@ impl RsRandomForestRegressor { #[new] #[pyo3(signature = (n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1, random_state=None))] - fn new(n_estimators: usize, max_depth: Option, min_samples_split: usize, - min_samples_leaf: usize, random_state: Option) -> Self { - Self { n_estimators, max_depth, min_samples_split, min_samples_leaf, random_state, fitted: None } + fn new( + n_estimators: usize, + max_depth: Option, + min_samples_split: usize, + min_samples_leaf: usize, + random_state: Option, + ) -> Self { + Self { + n_estimators, + max_depth, + min_samples_split, + min_samples_leaf, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, f64>) -> PyResult<()> { @@ -240,18 +252,28 @@ impl RsRandomForestRegressor { .with_max_depth(self.max_depth) .with_min_samples_split(self.min_samples_split) .with_min_samples_leaf(self.min_samples_leaf); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_to_numpy(py, &preds)) } @@ -270,7 +292,12 @@ impl RsExtraTreesRegressor { #[new] #[pyo3(signature = (n_estimators=100, max_depth=None, random_state=None))] fn new(n_estimators: usize, max_depth: Option, random_state: Option) -> Self { - Self { n_estimators, max_depth, random_state, fitted: None } + Self { + n_estimators, + max_depth, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, f64>) -> PyResult<()> { @@ -279,18 +306,28 @@ impl RsExtraTreesRegressor { let mut m = ferrolearn_tree::ExtraTreesRegressor::::new() .with_n_estimators(self.n_estimators) .with_max_depth(self.max_depth); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_to_numpy(py, &preds)) } @@ -309,8 +346,19 @@ pub struct RsGradientBoostingRegressor { impl RsGradientBoostingRegressor { #[new] #[pyo3(signature = (n_estimators=100, learning_rate=0.1, max_depth=Some(3), random_state=None))] - fn new(n_estimators: usize, learning_rate: f64, max_depth: Option, random_state: Option) -> Self { - Self { n_estimators, learning_rate, max_depth, random_state, fitted: None } + fn new( + n_estimators: usize, + learning_rate: f64, + max_depth: Option, + random_state: Option, + ) -> Self { + Self { + n_estimators, + learning_rate, + max_depth, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, f64>) -> PyResult<()> { @@ -320,18 +368,28 @@ impl RsGradientBoostingRegressor { .with_n_estimators(self.n_estimators) .with_learning_rate(self.learning_rate) .with_max_depth(self.max_depth); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_to_numpy(py, &preds)) } @@ -350,8 +408,19 @@ pub struct RsHistGradientBoostingRegressor { impl RsHistGradientBoostingRegressor { #[new] #[pyo3(signature = (n_estimators=100, learning_rate=0.1, max_depth=None, random_state=None))] - fn new(n_estimators: usize, learning_rate: f64, max_depth: Option, random_state: Option) -> Self { - Self { n_estimators, learning_rate, max_depth, random_state, fitted: None } + fn new( + n_estimators: usize, + learning_rate: f64, + max_depth: Option, + random_state: Option, + ) -> Self { + Self { + n_estimators, + learning_rate, + max_depth, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, f64>) -> PyResult<()> { @@ -361,18 +430,28 @@ impl RsHistGradientBoostingRegressor { .with_n_estimators(self.n_estimators) .with_learning_rate(self.learning_rate) .with_max_depth(self.max_depth); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_to_numpy(py, &preds)) } @@ -493,7 +572,12 @@ impl RsExtraTreesClassifier { #[new] #[pyo3(signature = (n_estimators=100, max_depth=None, random_state=None))] fn new(n_estimators: usize, max_depth: Option, random_state: Option) -> Self { - Self { n_estimators, max_depth, random_state, fitted: None } + Self { + n_estimators, + max_depth, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, i64>) -> PyResult<()> { @@ -502,18 +586,28 @@ impl RsExtraTreesClassifier { let mut m = ferrolearn_tree::ExtraTreesClassifier::::new() .with_n_estimators(self.n_estimators) .with_max_depth(self.max_depth); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_usize_to_numpy(py, &preds)) } @@ -532,7 +626,12 @@ impl RsAdaBoostClassifier { #[new] #[pyo3(signature = (n_estimators=50, learning_rate=1.0, random_state=None))] fn new(n_estimators: usize, learning_rate: f64, random_state: Option) -> Self { - Self { n_estimators, learning_rate, random_state, fitted: None } + Self { + n_estimators, + learning_rate, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, i64>) -> PyResult<()> { @@ -541,18 +640,28 @@ impl RsAdaBoostClassifier { let mut m = ferrolearn_tree::AdaBoostClassifier::::new() .with_n_estimators(self.n_estimators) .with_learning_rate(self.learning_rate); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_usize_to_numpy(py, &preds)) } @@ -571,8 +680,19 @@ pub struct RsGradientBoostingClassifier { impl RsGradientBoostingClassifier { #[new] #[pyo3(signature = (n_estimators=100, learning_rate=0.1, max_depth=Some(3), random_state=None))] - fn new(n_estimators: usize, learning_rate: f64, max_depth: Option, random_state: Option) -> Self { - Self { n_estimators, learning_rate, max_depth, random_state, fitted: None } + fn new( + n_estimators: usize, + learning_rate: f64, + max_depth: Option, + random_state: Option, + ) -> Self { + Self { + n_estimators, + learning_rate, + max_depth, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, i64>) -> PyResult<()> { @@ -582,18 +702,28 @@ impl RsGradientBoostingClassifier { .with_n_estimators(self.n_estimators) .with_learning_rate(self.learning_rate) .with_max_depth(self.max_depth); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_usize_to_numpy(py, &preds)) } @@ -612,8 +742,19 @@ pub struct RsHistGradientBoostingClassifier { impl RsHistGradientBoostingClassifier { #[new] #[pyo3(signature = (n_estimators=100, learning_rate=0.1, max_depth=None, random_state=None))] - fn new(n_estimators: usize, learning_rate: f64, max_depth: Option, random_state: Option) -> Self { - Self { n_estimators, learning_rate, max_depth, random_state, fitted: None } + fn new( + n_estimators: usize, + learning_rate: f64, + max_depth: Option, + random_state: Option, + ) -> Self { + Self { + n_estimators, + learning_rate, + max_depth, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, i64>) -> PyResult<()> { @@ -623,18 +764,28 @@ impl RsHistGradientBoostingClassifier { .with_n_estimators(self.n_estimators) .with_learning_rate(self.learning_rate) .with_max_depth(self.max_depth); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_usize_to_numpy(py, &preds)) } @@ -652,38 +803,51 @@ impl RsBaggingClassifier { #[new] #[pyo3(signature = (n_estimators=10, random_state=None))] fn new(n_estimators: usize, random_state: Option) -> Self { - Self { n_estimators, random_state, fitted: None } + Self { + n_estimators, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>, y: PyReadonlyArray1<'_, i64>) -> PyResult<()> { let x_nd = numpy2_to_ndarray(x); let y_nd = numpy1_to_ndarray_usize(y); - let mut m = ferrolearn_tree::BaggingClassifier::::new() - .with_n_estimators(self.n_estimators); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &y_nd) + let mut m = + ferrolearn_tree::BaggingClassifier::::new().with_n_estimators(self.n_estimators); + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &y_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_usize_to_numpy(py, &preds)) } } py_classifier!( - RsNearestCentroid, "_RsNearestCentroid", + RsNearestCentroid, + "_RsNearestCentroid", ferrolearn_neighbors::FittedNearestCentroid, (), - { - ferrolearn_neighbors::NearestCentroid::::new() - } + { ferrolearn_neighbors::NearestCentroid::::new() } ); // =========================================================================== @@ -703,32 +867,49 @@ impl RsMiniBatchKMeans { #[new] #[pyo3(signature = (n_clusters=8, max_iter=100, random_state=None))] fn new(n_clusters: usize, max_iter: usize, random_state: Option) -> Self { - Self { n_clusters, max_iter, random_state, fitted: None } + Self { + n_clusters, + max_iter, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>) -> PyResult<()> { let x_nd = numpy2_to_ndarray(x); let mut m = ferrolearn_cluster::MiniBatchKMeans::::new(self.n_clusters) .with_max_iter(self.max_iter); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &()) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &()) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_usize_to_numpy(py, &preds)) } #[getter] fn labels_<'py>(&self, py: Python<'py>) -> PyResult>> { - let f = self.fitted.as_ref() + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; Ok(ndarray1_usize_to_numpy(py, f.labels())) } @@ -746,13 +927,18 @@ impl RsDBSCAN { #[new] #[pyo3(signature = (eps=0.5, min_samples=5))] fn new(eps: f64, min_samples: usize) -> Self { - Self { eps, min_samples, fitted: None } + Self { + eps, + min_samples, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>) -> PyResult<()> { let x_nd = numpy2_to_ndarray(x); let m = ferrolearn_cluster::DBSCAN::::new(self.eps).with_min_samples(self.min_samples); - let fitted = m.fit(&x_nd, &()) + let fitted = m + .fit(&x_nd, &()) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) @@ -760,7 +946,9 @@ impl RsDBSCAN { #[getter] fn labels_<'py>(&self, py: Python<'py>) -> PyResult>> { - let f = self.fitted.as_ref() + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let lbls = f.labels(); let arr: Array1 = lbls.mapv(|v| v as i64); @@ -779,13 +967,17 @@ impl RsAgglomerativeClustering { #[new] #[pyo3(signature = (n_clusters=2))] fn new(n_clusters: usize) -> Self { - Self { n_clusters, fitted: None } + Self { + n_clusters, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>) -> PyResult<()> { let x_nd = numpy2_to_ndarray(x); let m = ferrolearn_cluster::AgglomerativeClustering::::new(self.n_clusters); - let fitted = m.fit(&x_nd, &()) + let fitted = m + .fit(&x_nd, &()) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) @@ -793,7 +985,9 @@ impl RsAgglomerativeClustering { #[getter] fn labels_<'py>(&self, py: Python<'py>) -> PyResult>> { - let f = self.fitted.as_ref() + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; Ok(ndarray1_usize_to_numpy(py, f.labels())) } @@ -811,14 +1005,21 @@ impl RsBirch { #[new] #[pyo3(signature = (n_clusters=None, threshold=0.5))] fn new(n_clusters: Option, threshold: f64) -> Self { - Self { n_clusters, threshold, fitted: None } + Self { + n_clusters, + threshold, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>) -> PyResult<()> { let x_nd = numpy2_to_ndarray(x); let mut m = ferrolearn_cluster::Birch::::new().with_threshold(self.threshold); - if let Some(n) = self.n_clusters { m = m.with_n_clusters(n); } - let fitted = m.fit(&x_nd, &()) + if let Some(n) = self.n_clusters { + m = m.with_n_clusters(n); + } + let fitted = m + .fit(&x_nd, &()) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) @@ -826,7 +1027,9 @@ impl RsBirch { #[getter] fn labels_<'py>(&self, py: Python<'py>) -> PyResult>> { - let f = self.fitted.as_ref() + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; Ok(ndarray1_usize_to_numpy(py, f.labels())) } @@ -845,25 +1048,40 @@ impl RsGaussianMixture { #[new] #[pyo3(signature = (n_components=1, max_iter=100, random_state=None))] fn new(n_components: usize, max_iter: usize, random_state: Option) -> Self { - Self { n_components, max_iter, random_state, fitted: None } + Self { + n_components, + max_iter, + random_state, + fitted: None, + } } fn fit(&mut self, x: PyReadonlyArray2<'_, f64>) -> PyResult<()> { let x_nd = numpy2_to_ndarray(x); let mut m = ferrolearn_cluster::GaussianMixture::::new(self.n_components) .with_max_iter(self.max_iter); - if let Some(s) = self.random_state { m = m.with_random_state(s); } - let fitted = m.fit(&x_nd, &()) + if let Some(s) = self.random_state { + m = m.with_random_state(s); + } + let fitted = m + .fit(&x_nd, &()) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; self.fitted = Some(fitted); Ok(()) } - fn predict<'py>(&self, py: Python<'py>, x: PyReadonlyArray2<'_, f64>) -> PyResult>> { - let f = self.fitted.as_ref() + fn predict<'py>( + &self, + py: Python<'py>, + x: PyReadonlyArray2<'_, f64>, + ) -> PyResult>> { + let f = self + .fitted + .as_ref() .ok_or_else(|| pyo3::exceptions::PyRuntimeError::new_err("not fitted"))?; let x_nd = numpy2_to_ndarray(x); - let preds = f.predict(&x_nd) + let preds = f + .predict(&x_nd) .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; Ok(ndarray1_usize_to_numpy(py, &preds)) } @@ -927,42 +1145,48 @@ py_transformer!( // =========================================================================== py_transformer!( - RsMinMaxScaler, "_RsMinMaxScaler", + RsMinMaxScaler, + "_RsMinMaxScaler", ferrolearn_preprocess::FittedMinMaxScaler, (), { ferrolearn_preprocess::MinMaxScaler::::new() } ); py_transformer!( - RsMaxAbsScaler, "_RsMaxAbsScaler", + RsMaxAbsScaler, + "_RsMaxAbsScaler", ferrolearn_preprocess::FittedMaxAbsScaler, (), { ferrolearn_preprocess::MaxAbsScaler::::new() } ); py_transformer!( - RsRobustScaler, "_RsRobustScaler", + RsRobustScaler, + "_RsRobustScaler", ferrolearn_preprocess::FittedRobustScaler, (), { ferrolearn_preprocess::RobustScaler::::new() } ); py_transformer!( - RsPowerTransformer, "_RsPowerTransformer", + RsPowerTransformer, + "_RsPowerTransformer", ferrolearn_preprocess::FittedPowerTransformer, (), { ferrolearn_preprocess::PowerTransformer::::new() } ); py_transformer!( - RsNystroem, "_RsNystroem", + RsNystroem, + "_RsNystroem", ferrolearn_kernel::FittedNystroem, (), { ferrolearn_kernel::Nystroem::::new() } ); py_transformer!( - RsRBFSampler, "_RsRBFSampler", + RsRBFSampler, + "_RsRBFSampler", ferrolearn_kernel::FittedRBFSampler, (), { ferrolearn_kernel::RBFSampler::::new() } diff --git a/ferrolearn-test-oracle/src/lib.rs b/ferrolearn-test-oracle/src/lib.rs index d83c8cef..3140f481 100644 --- a/ferrolearn-test-oracle/src/lib.rs +++ b/ferrolearn-test-oracle/src/lib.rs @@ -270,7 +270,11 @@ pub fn assert_close_rows_sign_ambiguous( let use_pos = pos_max <= neg_max; for (j, (&a, &e)) in a_row.iter().zip(e_row.iter()).enumerate() { let threshold = abs.max(rel * e.abs()); - let diff = if use_pos { (a - e).abs() } else { (a + e).abs() }; + let diff = if use_pos { + (a - e).abs() + } else { + (a + e).abs() + }; assert!( diff <= threshold, "{label}[row {i}, col {j}]: actual={a} expected={e} \ @@ -324,7 +328,11 @@ pub fn assert_labels_equal(actual: &[i64], expected: &[i64], label: &str) { /// labels diverge only by permutation. #[must_use] pub fn adjusted_rand_index(a: &[i64], b: &[i64]) -> f64 { - assert_eq!(a.len(), b.len(), "ARI: label arrays must be the same length"); + assert_eq!( + a.len(), + b.len(), + "ARI: label arrays must be the same length" + ); let n = a.len(); if n == 0 { return 1.0; @@ -347,12 +355,14 @@ pub fn adjusted_rand_index(a: &[i64], b: &[i64]) -> f64 { let col_sums: Vec = (0..classes_b.len()) .map(|j| cont.iter().map(|r| r[j]).sum()) .collect(); - let comb2 = |k: u64| if k < 2 { 0u128 } else { u128::from(k) * (u128::from(k) - 1) / 2 }; - let sum_comb_cont: u128 = cont - .iter() - .flat_map(|r| r.iter()) - .map(|&v| comb2(v)) - .sum(); + let comb2 = |k: u64| { + if k < 2 { + 0u128 + } else { + u128::from(k) * (u128::from(k) - 1) / 2 + } + }; + let sum_comb_cont: u128 = cont.iter().flat_map(|r| r.iter()).map(|&v| comb2(v)).sum(); let sum_comb_row: u128 = row_sums.iter().map(|&v| comb2(v)).sum(); let sum_comb_col: u128 = col_sums.iter().map(|&v| comb2(v)).sum(); let total = comb2(n as u64); @@ -428,9 +438,9 @@ pub fn json_to_labels(value: &serde_json::Value) -> Vec { .iter() .map(|v| { v.as_i64().unwrap_or_else(|| { - v.as_f64().map(|x| x as i64).unwrap_or_else(|| { - panic!("label must be int or float-int, got {v}") - }) + v.as_f64() + .map(|x| x as i64) + .unwrap_or_else(|| panic!("label must be int or float-int, got {v}")) }) }) .collect() diff --git a/ferrolearn-tree/src/adaboost.rs b/ferrolearn-tree/src/adaboost.rs index 36367d1d..175bdef7 100644 --- a/ferrolearn-tree/src/adaboost.rs +++ b/ferrolearn-tree/src/adaboost.rs @@ -681,8 +681,7 @@ impl FittedAdaBoostClassifier { .. } = tree_nodes[leaf_idx] { - let log_probs: Vec = - dist.iter().map(|&p| p.max(eps).ln()).collect(); + let log_probs: Vec = dist.iter().map(|&p| p.max(eps).ln()).collect(); let mean_log: F = log_probs.iter().copied().fold(F::zero(), |a, b| a + b) / k_f; for k in 0..n_classes { @@ -784,13 +783,11 @@ impl FittedAdaBoostClassifier { .. } = tree_nodes[leaf_idx] { - let log_probs: Vec = - dist.iter().map(|&p| p.max(eps).ln()).collect(); + let log_probs: Vec = dist.iter().map(|&p| p.max(eps).ln()).collect(); let mean_log: F = log_probs.iter().copied().fold(F::zero(), |a, b| a + b) / k_f; for k in 0..n_classes { - out[[i, k]] = - out[[i, k]] + k_minus_1 * (log_probs[k] - mean_log); + out[[i, k]] = out[[i, k]] + k_minus_1 * (log_probs[k] - mean_log); } } else if let Node::Leaf { value, .. } = tree_nodes[leaf_idx] { let class_idx = value.to_f64().map_or(0, |f| f.round() as usize); diff --git a/ferrolearn-tree/src/adaboost_regressor.rs b/ferrolearn-tree/src/adaboost_regressor.rs index 985f57f7..4f268632 100644 --- a/ferrolearn-tree/src/adaboost_regressor.rs +++ b/ferrolearn-tree/src/adaboost_regressor.rs @@ -521,7 +521,6 @@ fn resample_weighted(weights: &[F], n: usize) -> Vec { #[cfg(test)] mod tests { use super::*; - use ndarray::array; #[test] fn test_adaboost_regressor_simple() { diff --git a/ferrolearn-tree/src/decision_tree.rs b/ferrolearn-tree/src/decision_tree.rs index 8a542149..7a47a75b 100644 --- a/ferrolearn-tree/src/decision_tree.rs +++ b/ferrolearn-tree/src/decision_tree.rs @@ -888,12 +888,8 @@ fn build_classification_tree( // Reborrow the rng for the split-finder; recursive children get fresh // reborrows via `rng.as_deref_mut()` below. - let best = find_best_classification_split( - data, - indices, - params.min_samples_leaf, - rng.as_deref_mut(), - ); + let best = + find_best_classification_split(data, indices, params.min_samples_leaf, rng.as_deref_mut()); if let Some((best_feature, best_threshold, best_impurity_decrease)) = best { let (left_indices, right_indices): (Vec, Vec) = indices @@ -915,14 +911,8 @@ fn build_classification_tree( params, rng.as_deref_mut(), ); - let right_idx = build_classification_tree( - data, - &right_indices, - nodes, - depth + 1, - params, - rng.as_deref_mut(), - ); + let right_idx = + build_classification_tree(data, &right_indices, nodes, depth + 1, params, rng); nodes[node_idx] = Node::Split { feature: best_feature, @@ -1069,12 +1059,8 @@ fn build_regression_tree( return idx; } - let best = find_best_regression_split( - data, - indices, - params.min_samples_leaf, - rng.as_deref_mut(), - ); + let best = + find_best_regression_split(data, indices, params.min_samples_leaf, rng.as_deref_mut()); if let Some((best_feature, best_threshold, best_impurity_decrease)) = best { let (left_indices, right_indices): (Vec, Vec) = indices @@ -1096,14 +1082,7 @@ fn build_regression_tree( params, rng.as_deref_mut(), ); - let right_idx = build_regression_tree( - data, - &right_indices, - nodes, - depth + 1, - params, - rng.as_deref_mut(), - ); + let right_idx = build_regression_tree(data, &right_indices, nodes, depth + 1, params, rng); nodes[node_idx] = Node::Split { feature: best_feature, @@ -1280,8 +1259,7 @@ pub(crate) fn aggregate_tree_importances( Some(map) => map[t][*feature], None => *feature, }; - total_imp[original_feature] = - total_imp[original_feature] + w * *impurity_decrease; + total_imp[original_feature] = total_imp[original_feature] + w * *impurity_decrease; } } } diff --git a/ferrolearn-tree/src/gradient_boosting.rs b/ferrolearn-tree/src/gradient_boosting.rs index 69c68ebd..994dced8 100644 --- a/ferrolearn-tree/src/gradient_boosting.rs +++ b/ferrolearn-tree/src/gradient_boosting.rs @@ -996,6 +996,7 @@ impl FittedGradientBoostingClassifier { /// /// Returns [`FerroError::ShapeMismatch`] if the number of features /// does not match the fitted model. + #[allow(clippy::needless_range_loop)] // index-by-class loop is natural for the per-class score accumulation pub fn predict_proba(&self, x: &Array2) -> Result, FerroError> { if x.ncols() != self.n_features { return Err(FerroError::ShapeMismatch { diff --git a/ferrolearn-tree/src/hist_gradient_boosting.rs b/ferrolearn-tree/src/hist_gradient_boosting.rs index f3fd580b..ad9274fa 100644 --- a/ferrolearn-tree/src/hist_gradient_boosting.rs +++ b/ferrolearn-tree/src/hist_gradient_boosting.rs @@ -1975,6 +1975,7 @@ impl FittedHistGradientBoostingClassifier { /// /// Returns [`FerroError::ShapeMismatch`] if the number of features /// does not match the fitted model. + #[allow(clippy::needless_range_loop)] // index-by-class loop is natural for the per-class score accumulation pub fn predict_proba(&self, x: &Array2) -> Result, FerroError> { if x.ncols() != self.n_features { return Err(FerroError::ShapeMismatch { diff --git a/ferrolearn-tree/tests/api_proof.rs b/ferrolearn-tree/tests/api_proof.rs index 41017b9f..117a5922 100644 --- a/ferrolearn-tree/tests/api_proof.rs +++ b/ferrolearn-tree/tests/api_proof.rs @@ -47,7 +47,9 @@ fn two_clusters_2d() -> (Array2, Array1, Array1) { ) .unwrap(); let y_cls = array![0usize, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]; - let y_reg = array![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0]; + let y_reg = array![ + 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0 + ]; (x, y_cls, y_reg) } @@ -75,7 +77,7 @@ fn assert_importances_well_formed(imp: &Array1, n_features: usize) { "feature_importances should sum to 1 or be all zeros; got sum = {total}" ); for &v in imp.iter() { - assert!(v >= 0.0 && v <= 1.0, "importance {v} outside [0, 1]"); + assert!((0.0..=1.0).contains(&v), "importance {v} outside [0, 1]"); } } @@ -512,13 +514,17 @@ fn api_proof_random_trees_embedding() { fn api_proof_f32_compiles() { let x = Array2::from_shape_vec( (8, 2), - vec![0.0f32, 0.0, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, 5.0, 5.0, 5.5, 5.0, 5.0, 5.5, 5.5, 5.5], + vec![ + 0.0f32, 0.0, 0.5, 0.0, 0.0, 0.5, 0.5, 0.5, 5.0, 5.0, 5.5, 5.0, 5.0, 5.5, 5.5, 5.5, + ], ) .unwrap(); let y_cls = array![0usize, 0, 0, 0, 1, 1, 1, 1]; let y_reg = array![1.0f32, 2.0, 3.0, 4.0, 10.0, 11.0, 12.0, 13.0]; - let _ = DecisionTreeClassifier::::new().fit(&x, &y_cls).unwrap(); + let _ = DecisionTreeClassifier::::new() + .fit(&x, &y_cls) + .unwrap(); let _ = DecisionTreeRegressor::::new().fit(&x, &y_reg).unwrap(); let _ = ExtraTreeClassifier::::new().fit(&x, &y_cls).unwrap(); let _ = ExtraTreeRegressor::::new().fit(&x, &y_reg).unwrap(); diff --git a/ferrolearn-tree/tests/conformance_sklearn.rs b/ferrolearn-tree/tests/conformance_sklearn.rs index 9c3d30d5..61036370 100644 --- a/ferrolearn-tree/tests/conformance_sklearn.rs +++ b/ferrolearn-tree/tests/conformance_sklearn.rs @@ -31,8 +31,8 @@ use ferrolearn_core::introspection::HasFeatureImportances; use ferrolearn_core::{Fit, Predict}; use ferrolearn_test_oracle::{ - assert_close, json_to_array1, json_to_array2, json_to_labels, load_fixture, - TOL_TREE_ENSEMBLE_ABS, TOL_TREE_ENSEMBLE_REL, + TOL_TREE_ENSEMBLE_ABS, TOL_TREE_ENSEMBLE_REL, assert_close, json_to_array1, json_to_array2, + json_to_labels, load_fixture, }; use ndarray::Array1; @@ -49,7 +49,11 @@ const QUALITY_FLOOR: f64 = 0.95; /// Classification accuracy on integer label arrays. fn accuracy(preds: &Array1, targets: &Array1) -> f64 { assert_eq!(preds.len(), targets.len()); - let correct = preds.iter().zip(targets.iter()).filter(|(a, b)| a == b).count(); + let correct = preds + .iter() + .zip(targets.iter()) + .filter(|(a, b)| a == b) + .count(); correct as f64 / targets.len() as f64 } @@ -130,13 +134,7 @@ fn check_feature_importances( // comparison at ensemble tolerance is the right check. We assert per-index // rather than slice so we get a useful first-failure index in the panic. for (i, (&a, &e)) in actual.iter().zip(expected.iter()).enumerate() { - assert_close( - a, - e, - rel, - abs, - &format!("{label}.feature_importances[{i}]"), - ); + assert_close(a, e, rel, abs, &format!("{label}.feature_importances[{i}]")); } } @@ -331,7 +329,9 @@ fn conformance_gradient_boosting_classifier() { .with_learning_rate(learning_rate) .with_random_state(seed); let fitted = model.fit(&x, &y).expect("GradientBoostingClassifier fit"); - let preds = fitted.predict(&x).expect("GradientBoostingClassifier predict"); + let preds = fitted + .predict(&x) + .expect("GradientBoostingClassifier predict"); let sklearn_accuracy = fx.expected["accuracy"].as_f64().unwrap(); let acc = accuracy(&preds, &y); @@ -376,7 +376,9 @@ fn conformance_gradient_boosting_regressor() { .with_learning_rate(learning_rate) .with_random_state(seed); let fitted = model.fit(&x, &y).expect("GradientBoostingRegressor fit"); - let preds = fitted.predict(&x).expect("GradientBoostingRegressor predict"); + let preds = fitted + .predict(&x) + .expect("GradientBoostingRegressor predict"); let sklearn_r2 = fx.expected["r2"].as_f64().unwrap(); let r2v = r2(&preds, &y); diff --git a/ferrolearn-tree/tests/conformance_surface_coverage.rs b/ferrolearn-tree/tests/conformance_surface_coverage.rs index 1fe55320..a49e138c 100644 --- a/ferrolearn-tree/tests/conformance_surface_coverage.rs +++ b/ferrolearn-tree/tests/conformance_surface_coverage.rs @@ -5,7 +5,7 @@ //! exclusion OR mentioned by leaf name in at least one test source file. //! Modeled on ferrolearn-linear/tests/conformance_surface_coverage.rs. -use ferrolearn_test_oracle::{assert_surface_covered, SurfaceExclusions, SurfaceInventory}; +use ferrolearn_test_oracle::{SurfaceExclusions, SurfaceInventory, assert_surface_covered}; use std::path::{Path, PathBuf}; fn crate_root() -> PathBuf { @@ -18,8 +18,12 @@ fn test_dir() -> PathBuf { #[test] fn surface_coverage_gate() { - let inv_path = test_dir().join("conformance").join("_surface_inventory.toml"); - let exc_path = test_dir().join("conformance").join("_surface_exclusions.toml"); + let inv_path = test_dir() + .join("conformance") + .join("_surface_inventory.toml"); + let exc_path = test_dir() + .join("conformance") + .join("_surface_exclusions.toml"); let inventory = SurfaceInventory::load(&inv_path); let exclusions = SurfaceExclusions::load(&exc_path); if inventory.items.is_empty() { @@ -39,10 +43,7 @@ fn surface_coverage_gate() { } v }; - let paths: Vec<&Path> = candidate_test_files - .iter() - .map(PathBuf::as_path) - .collect(); + let paths: Vec<&Path> = candidate_test_files.iter().map(PathBuf::as_path).collect(); assert!( !paths.is_empty(), "no .rs test files found under {}", diff --git a/ferrolearn-tree/tests/conformance_wave3.rs b/ferrolearn-tree/tests/conformance_wave3.rs index 3aecb9ff..36b41981 100644 --- a/ferrolearn-tree/tests/conformance_wave3.rs +++ b/ferrolearn-tree/tests/conformance_wave3.rs @@ -56,7 +56,11 @@ fn conformance_extra_tree_regressor() { let y_mean = y.iter().sum::() / y.len() as f64; let ss_tot: f64 = y.iter().map(|v| (v - y_mean).powi(2)).sum(); - let ss_res: f64 = preds.iter().zip(y.iter()).map(|(a, e)| (a - e).powi(2)).sum(); + let ss_res: f64 = preds + .iter() + .zip(y.iter()) + .map(|(a, e)| (a - e).powi(2)) + .sum(); let r2 = 1.0 - ss_res / ss_tot; let expected_r2 = fx.expected["r2"].as_f64().unwrap_or(0.5); assert!( @@ -116,7 +120,11 @@ fn conformance_extra_trees_regressor() { let y_mean = y.iter().sum::() / y.len() as f64; let ss_tot: f64 = y.iter().map(|v| (v - y_mean).powi(2)).sum(); - let ss_res: f64 = preds.iter().zip(y.iter()).map(|(a, e)| (a - e).powi(2)).sum(); + let ss_res: f64 = preds + .iter() + .zip(y.iter()) + .map(|(a, e)| (a - e).powi(2)) + .sum(); let r2 = 1.0 - ss_res / ss_tot; let expected_r2 = fx.expected["r2"].as_f64().unwrap_or(0.5); assert!( @@ -172,7 +180,11 @@ fn conformance_bagging_regressor() { let y_mean = y.iter().sum::() / y.len() as f64; let ss_tot: f64 = y.iter().map(|v| (v - y_mean).powi(2)).sum(); - let ss_res: f64 = preds.iter().zip(y.iter()).map(|(a, e)| (a - e).powi(2)).sum(); + let ss_res: f64 = preds + .iter() + .zip(y.iter()) + .map(|(a, e)| (a - e).powi(2)) + .sum(); let r2 = 1.0 - ss_res / ss_tot; let expected_r2 = fx.expected["r2"].as_f64().unwrap_or(0.5); assert!( @@ -203,7 +215,11 @@ fn conformance_adaboost_regressor() { let y_mean = y.iter().sum::() / y.len() as f64; let ss_tot: f64 = y.iter().map(|v| (v - y_mean).powi(2)).sum(); - let ss_res: f64 = preds.iter().zip(y.iter()).map(|(a, e)| (a - e).powi(2)).sum(); + let ss_res: f64 = preds + .iter() + .zip(y.iter()) + .map(|(a, e)| (a - e).powi(2)) + .sum(); let r2 = 1.0 - ss_res / ss_tot; let expected_r2 = fx.expected["r2"].as_f64().unwrap_or(0.5); assert!( @@ -267,7 +283,11 @@ fn conformance_hist_gradient_boosting_regressor() { let y_mean = y.iter().sum::() / y.len() as f64; let ss_tot: f64 = y.iter().map(|v| (v - y_mean).powi(2)).sum(); - let ss_res: f64 = preds.iter().zip(y.iter()).map(|(a, e)| (a - e).powi(2)).sum(); + let ss_res: f64 = preds + .iter() + .zip(y.iter()) + .map(|(a, e)| (a - e).powi(2)) + .sum(); let r2 = 1.0 - ss_res / ss_tot; let expected_r2 = fx.expected["r2"].as_f64().unwrap_or(0.5); assert!( @@ -329,7 +349,9 @@ fn conformance_random_trees_embedding() { .with_max_depth(max_depth) .with_random_state(random_state); let fitted = model.fit(&x, &()).expect("RandomTreesEmbedding fit"); - let xt = fitted.transform(&x).expect("RandomTreesEmbedding transform"); + let xt = fitted + .transform(&x) + .expect("RandomTreesEmbedding transform"); assert_eq!(xt.nrows(), x.nrows(), "embedding rows"); // Expansion factor: each tree contributes one one-hot encoded leaf // index; ferrolearn may use a denser binary representation. @@ -355,8 +377,8 @@ fn conformance_voting_classifier() { // ferrolearn's VotingClassifier uses a Vec> of max-depths // for an ensemble of DTs — not arbitrary base estimators like sklearn. // Use a 2-tree ensemble matching the fixture's LR+DT pair pattern. - let model = ferrolearn_tree::VotingClassifier::::new() - .with_max_depths(vec![Some(5), Some(5)]); + let model = + ferrolearn_tree::VotingClassifier::::new().with_max_depths(vec![Some(5), Some(5)]); let fitted = model.fit(&x, &y).expect("VotingClassifier fit"); let preds = fitted.predict(&x).expect("VotingClassifier predict"); let expected_acc = fx.expected["accuracy"].as_f64().unwrap_or(0.5); @@ -375,16 +397,23 @@ fn conformance_voting_regressor() { let x = json_to_array2(&fx.input["X"]); let y = json_to_array1(&fx.input["y"]); - let model = ferrolearn_tree::VotingRegressor::::new() - .with_max_depths(vec![Some(5), Some(5)]); + let model = + ferrolearn_tree::VotingRegressor::::new().with_max_depths(vec![Some(5), Some(5)]); let fitted = model.fit(&x, &y).expect("VotingRegressor fit"); let preds = fitted.predict(&x).expect("VotingRegressor predict"); let y_mean = y.iter().sum::() / y.len() as f64; let ss_tot: f64 = y.iter().map(|v| (v - y_mean).powi(2)).sum(); - let ss_res: f64 = preds.iter().zip(y.iter()).map(|(a, e)| (a - e).powi(2)).sum(); + let ss_res: f64 = preds + .iter() + .zip(y.iter()) + .map(|(a, e)| (a - e).powi(2)) + .sum(); let r2 = 1.0 - ss_res / ss_tot; // R² >= 0 means the model beats predicting the mean — minimal sanity. // Not comparable to sklearn LR+DT directly; ferrolearn uses DT-only. - assert!(r2 >= 0.0, "VotingRegressor R² {r2:.4} below baseline (mean prediction)"); + assert!( + r2 >= 0.0, + "VotingRegressor R² {r2:.4} below baseline (mean prediction)" + ); }