Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.1.0
with:
enable-cache: true
- name: Run pre-commit
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
- name: Setup uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.1.0
with:
python-version: '3.14'
enable-cache: true
- name: Setup just
uses: extractions/setup-just@v3
uses: extractions/setup-just@v4
- name: Setup Khiops
run: |
sudo apt-get update -y && sudo apt-get install wget lsb-release -y
Expand All @@ -35,7 +35,7 @@ jobs:
mkdir docs/_static
just docs
- name: Upload the package as an artifact
uses: actions/upload-pages-artifact@v4
uses: actions/upload-pages-artifact@v5
with:
path: ./docs/_build/html
publish:
Expand All @@ -44,7 +44,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Setup Github Pages
uses: actions/configure-pages@v5
uses: actions/configure-pages@v6
- name: Deploy docs to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
uses: actions/deploy-pages@v5
8 changes: 4 additions & 4 deletions .github/workflows/package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v4
uses: actions/checkout@v6
- name: Setup uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.1.0
with:
python-version: '3.14'
enable-cache: true
Expand All @@ -44,7 +44,7 @@ jobs:
id-token: write
steps:
- name: Download package artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v8
with:
name: pip-package
path: dist/
Expand All @@ -61,7 +61,7 @@ jobs:
id-token: write
steps:
- name: Download package artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v8
with:
name: pip-package
path: dist/
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ jobs:
os: ['ubuntu-latest', 'windows-latest', 'macos-latest']
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v6
- name: Install uv
uses: astral-sh/setup-uv@v6
uses: astral-sh/setup-uv@v8.1.0
with:
python-version: ${{ matrix.python-version }}
enable-cache: true
- name: Setup just
uses: extractions/setup-just@v3
uses: extractions/setup-just@v4
- name: Install Miniforge
uses: conda-incubator/setup-miniconda@v3
uses: conda-incubator/setup-miniconda@v4
with:
auto-update-conda: true
miniforge-version: latest
Expand Down
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## [0.5] - 2026-04-30

### Fixed

- `compute_dirac_indexes`: Incorrect result for single-bin histograms.

## [0.4] - 2026-01-05

### Fixed
Expand All @@ -23,7 +29,8 @@
- `calibration_error` function
- `build_reliability_diagram` function

[Unreleased]: https://github.com/khiopslab/khalib/compare/0.4...HEAD
[Unreleased]: https://github.com/khiopslab/khalib/compare/0.5...HEAD
[0.5]: https://github.com/khiopslab/khalib/compare/0.4...0.5
[0.4]: https://github.com/khiopslab/khalib/compare/0.3...0.4
[0.3]: https://github.com/khiopslab/khalib/compare/0.2...0.3
[0.2]: https://github.com/khiopslab/khalib/compare/0.1...0.2
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "khalib"
version = "0.4"
version = "0.5"
description = "Classifier Calibration with Khiops"
authors = [{ name = "Felipe Olmos", email = "luisfelipe.olmosmarchant@orange.com" }]
requires-python = ">=3.11"
Expand Down
36 changes: 20 additions & 16 deletions src/khalib/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,18 +304,16 @@ def from_data(
y_indexes = le.transform(y)
x_bin_indexes = (
np.searchsorted(breakpoints[:-1], x, side="left") - 1
).reshape(1, -1)
).reshape(-1)
x_bin_indexes[x_bin_indexes < 0] = 0
# Note: reshape is needed for the 1-column matrix case if not the
# iterator below does not work

# Compute the target frequencies
target_freqs = [[0 for _ in le.classes_] for _ in breakpoints[:-1]]
for y_score_bin_index, y_index in np.nditer(
[x_bin_indexes, y_indexes]
):
target_freqs[y_score_bin_index][y_index] += 1
target_freqs = [tuple(freqs) for freqs in target_freqs]
target_freqs = [tuple(tf) for tf in target_freqs]
# Otherwise there is just one interval
else:
# Non-informative variable: histogram with only the bin (min, max)
Expand Down Expand Up @@ -417,8 +415,10 @@ def __post_init__(self):
f"the number of classes: {len(tfreqs)} != {len(self.classes)}."
)
if sum(tfreqs) != self.freqs[i]:
f"`target_freqs` at bin index {i} sums different from the bin "
f"frequency: {sum(tfreqs)} != {self.freqs[i]}"
raise ValueError(
f"`target_freqs` at bin index {i} sums different from the bin "
f"frequency: {sum(tfreqs)} != {self.freqs[i]}"
)

# Initialize the densities and target probabilities
self.densities = [
Expand Down Expand Up @@ -797,11 +797,6 @@ def fit(self, X, y): # noqa: N803
for k in range(n_classes)
]

calibrated_probas = np.empty(scores.shape)
for k, histogram in enumerate(self.histograms_):
calibrated_probas[:, k] = calibrate_binary(
scores[:, k], histogram, only_positive=True
)
return self

def predict_proba(self, X): # noqa: N803
Expand Down Expand Up @@ -913,7 +908,7 @@ def build_reliability_diagram(
*Density plot only:* If the log-difference between the maximal and minimal
positive density values is larger than 'log_plot_threshold' then the density
plot uses a log scale in the y-axis.
min_density_bar_width : float, default=5.0e-03
min_density_bar_width : float, default=2.5e-03
*Density plot only:* If a bin of the scores' unsupervised histogram has a width
lower than 'min_density_bar_width' then it is plotted as having a width of
'min_density_bar_width'.
Expand All @@ -935,7 +930,7 @@ def build_reliability_diagram(
)
if len(y.shape) > 1:
raise ValueError(
f"'y_scores' must be a 1-D array-like object, but its shape is {y.shape}"
f"'y' must be a 1-D array-like object, but its shape is {y.shape}"
)
if dirac_threshold <= 0.0:
raise ValueError("'dirac_threshold' must be positive")
Expand Down Expand Up @@ -1049,7 +1044,7 @@ def build_reliability_diagram(
)
if freq > 0
]
density_bar_log_range = None
density_bar_log_range = 0
if density_bar_heights:
density_bar_log_range = math.log10(max(density_bar_heights)) - math.log10(
min(density_bar_heights)
Expand Down Expand Up @@ -1078,10 +1073,19 @@ def compute_dirac_indexes(uhist, dirac_threshold):

We declare a dirac mass bin if:

- it is surrounded by empty bins.
- it is surrounded by empty bins or if it is the only one
- its length is less than ``dirac_threshold``

"""

# Early return for just one bin
if len(uhist.freqs) == 1:
if uhist.bins[0][1] - uhist.bins[0][0] < dirac_threshold:
return [True]
else:
return [False]

# More than one bin
dirac_indexes = []
if (
len(uhist.freqs) > 1
Expand All @@ -1102,7 +1106,7 @@ def compute_dirac_indexes(uhist, dirac_threshold):
else:
dirac_indexes.append(False)
if (
len(uhist.freqs) > 2
len(uhist.freqs) > 1
and uhist.freqs[-2] == 0
and (uhist.bins[-1][1] - uhist.bins[-1][0] < dirac_threshold)
):
Expand Down
15 changes: 14 additions & 1 deletion src/tests/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,19 @@ def test_find_vfind_coherence(self, y_fixtures, y_scores_fixtures):
[histogram.find(score) for score in histogram.breakpoints],
)

def test_find_vfind_boundary_coherence(self):
"""Test find/vfind agree on all boundary edge cases"""
histogram = Histogram(
breakpoints=[0.0, 0.3, 0.7, 1.0],
freqs=[10, 20, 15],
)
# Test: below min, at each breakpoint, between bins, above max
edge_values = [-0.5, 0.0, 0.15, 0.3, 0.5, 0.7, 0.85, 1.0, 1.5]
np.testing.assert_array_equal(
histogram.vfind(edge_values),
[histogram.find(v) for v in edge_values],
)

@pytest.mark.parametrize("y_scores_fixture", ["original", "constant"])
@pytest.mark.parametrize("method", ["eq-freq", "eq-width", "khiops"])
def test_manual_vs_khiops_coherence(
Expand Down Expand Up @@ -452,7 +465,7 @@ def test_uniform(self):
rng = np.random.default_rng(seed=1234567)
y_scores = rng.uniform(size=2000)
uhist = Histogram.from_data(y_scores, use_finest=True)
assert compute_dirac_indexes(uhist, 1e-06) == [False, False]
assert compute_dirac_indexes(uhist, 1e-06) == [False]

def test_dirac(self):
y_scores = np.array([0.2] * 250 + [0.5] * 250 + [0.9] * 500)
Expand Down
Loading