Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
f4e1cf2
test: de-flake file server tests with port polling instead of fixed s…
blooop Jun 12, 2026
4f6db7f
test: resolve long-skipped tests
blooop Jun 12, 2026
1b71829
test: unit tests for BenchResult container (results/bench_result.py)
blooop Jun 12, 2026
f02775e
test: unit tests for DataSetResult (results/dataset_result.py)
blooop Jun 12, 2026
86093f6
test: unit tests for VolumeResult (results/volume_result.py)
blooop Jun 12, 2026
83a9b12
test: unit tests for ComposableContainerDataset (composable_container…
blooop Jun 12, 2026
700ba0e
test: unit tests for bench_cfg (BenchPlotSrvCfg, BenchRunCfg, BenchCfg)
blooop Jun 12, 2026
d712d58
test: unit tests for WorkerJob (bencher/worker_job.py)
blooop Jun 12, 2026
6533329
test: unit tests for CurveResult (holoview_results/curve_result.py)
blooop Jun 12, 2026
60a1637
test: unit tests for BandResult (holoview_results/band_result.py)
blooop Jun 12, 2026
5823184
test: unit tests for TableResult (holoview_results/table_result.py)
blooop Jun 12, 2026
475794d
test: unit tests for ScatterResult (holoview_results/scatter_result.py)
blooop Jun 12, 2026
4cc7472
test: unit tests for BarResult (holoview_results/bar_result.py)
blooop Jun 12, 2026
006f84d
test: unit tests for BoxWhiskerResult (distribution_result/box_whiske…
blooop Jun 12, 2026
c54c3e6
test: unit tests for ViolinResult (distribution_result/violin_result.py)
blooop Jun 12, 2026
24818fa
test: unit tests for ScatterJitterResult (distribution_result/scatter…
blooop Jun 12, 2026
4c75f4e
test: unit tests for HistogramResult (results/histogram_result.py)
blooop Jun 12, 2026
8372806
test: unit tests for OptimizeResult (results/optimize_result.py)
blooop Jun 12, 2026
f8eee40
fix lint: pylint disables for fixtures and test-class counters
blooop Jun 12, 2026
50ca54a
fix: show result-var units on histogram x-axis
blooop Jun 12, 2026
e9a91fc
test: consolidate duplicated result-test helpers into test/helpers.py
blooop Jun 12, 2026
c5f7936
refactor: consolidate duplicated _run_sweep helpers into test/helpers.py
blooop Jun 12, 2026
86f5353
fix: bump version to 1.107.0 (1.106.x already released on main)
blooop Jun 12, 2026
8c8b3a6
Merge origin/main (resolve version to 1.107.0)
blooop Jun 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bencher/results/histogram_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,12 @@ def to_plot(

def _make_histogram(self, dataset: xr.Dataset, result_var: Parameter, **kwargs):
"""Render a single histogram from a dataset (no over_time handling)."""
units = getattr(result_var, "units", "") or ""
xlabel = f"{result_var.name} [{units}]" if units else result_var.name
plot = dataset.hvplot(
kind="hist",
y=[result_var.name],
xlabel=xlabel,
ylabel="count",
legend="bottom_right",
title=f"{result_var.name} vs Count",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "holobench"
version = "1.106.2"
version = "1.107.0"

authors = [{ name = "Austin Gregg-Smith", email = "blooop@gmail.com" }]
description = "A package for benchmarking the performance of arbitrary functions"
Expand Down
64 changes: 64 additions & 0 deletions test/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Shared helpers for result-type unit tests.

These small utilities were previously copy-pasted across several
``test_*_result.py`` modules; centralising them keeps the unwrap/inner-element
and run-config logic consistent in one place.
"""

from __future__ import annotations

import bencher as bn


def unwrap_hv(obj):
"""Unwrap a panel Row/HoloViews pane returned by filter() to the hv object inside."""
while True:
if hasattr(obj, "object"):
obj = obj.object
elif hasattr(obj, "objects"):
assert len(obj.objects) > 0
obj = obj.objects[0]
else:
return obj


def inner_element(overlay):
"""The plot methods return an hv.Overlay wrapping a single distribution element."""
items = list(overlay)
assert len(items) == 1
return items[0]


def run_cfg_with(repeats: int) -> bn.BenchRunCfg:
"""A BenchRunCfg with caching and auto-plot disabled for the given repeat count."""
return bn.BenchRunCfg(
repeats=repeats, cache_results=False, cache_samples=False, auto_plot=False
)


def run_named_sweep(bench_class, name, input_vars, result_vars, repeats=1):
"""Run a sweep on a freshly named ``Bench`` with caching and plot callbacks disabled.

Shared by the bar and scatter result tests, which construct the bench by name.
"""
bench = bn.Bench(name, bench_class(), run_cfg=run_cfg_with(repeats))
return bench.plot_sweep(
name, input_vars=input_vars, result_vars=result_vars, plot_callbacks=False
)


def run_dist_sweep(worker_cls, input_vars, repeats, name_prefix):
"""Run a categorical ``value`` sweep via ``to_bench`` for distribution-style tests.

Shared by the box-whisker, violin and scatter-jitter result tests, which each
previously defined an identical ``_run_sweep`` differing only by name prefix.
"""
run_cfg = run_cfg_with(repeats)
bench = worker_cls().to_bench(run_cfg)
return bench.plot_sweep(
f"{name_prefix}_{worker_cls.__name__}_{repeats}",
input_vars=input_vars,
result_vars=["value"],
run_cfg=run_cfg,
plot_callbacks=False,
)
218 changes: 218 additions & 0 deletions test/test_band_result.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
"""Tests for bencher/results/holoview_results/band_result.py (BandResult)."""

import math
from types import SimpleNamespace

import holoviews as hv
import numpy as np
import pytest

import bencher as bn
from bencher.results.bench_result_base import ReduceType
from bencher.results.holoview_results.band_result import BandResult
from test.helpers import run_cfg_with, unwrap_hv


def plot_opts(overlay: hv.Overlay) -> dict:
return overlay.opts.get("plot").kwargs


class BandBench(bn.ParametrizedSweep):
"""Minimal 1-float sweep; the repeat dimension supplies the percentile sample pool."""

size = bn.FloatSweep(default=50, bounds=[10, 100], samples=3, doc="Size")
throughput = bn.ResultFloat(units="MB/s", doc="Throughput")

def benchmark(self):
self.throughput = self.size * 0.5 + math.sin(self.size)


class BandCatBench(bn.ParametrizedSweep):
"""1 float + 1 categorical: the categorical dim is flattened into the sample pool."""

size = bn.FloatSweep(default=50, bounds=[10, 100], samples=3, doc="Size")
backend = bn.StringSweep(["redis", "local"], doc="Backend")
throughput = bn.ResultFloat(units="MB/s", doc="Throughput")

def benchmark(self):
base = {"redis": 1.0, "local": 2.0}[self.backend]
self.throughput = self.size * base


class BandNanBench(bn.ParametrizedSweep):
"""Sweep whose worker returns NaN for one input point."""

size = bn.FloatSweep(default=50, bounds=[10, 100], samples=3, doc="Size")
throughput = bn.ResultFloat(units="MB/s", doc="Throughput")

def benchmark(self):
self.throughput = float("nan") if self.size < 20 else self.size * 0.5


class BandVecBench(bn.ParametrizedSweep):
"""Vector (non-scalar) result — outside BandResult's SCALAR_RESULT_TYPES filter."""

size = bn.FloatSweep(default=50, bounds=[10, 100], samples=3, doc="Size")
vec = bn.ResultVec(size=2, units="m", doc="Vector result")

def benchmark(self):
self.vec = [self.size, self.size * 2]


class BandTimeBench(bn.ParametrizedSweep):
"""Sweep run over several time snapshots to exercise the over_time band path."""

size = bn.FloatSweep(default=50, bounds=[10, 100], samples=3, doc="Size")
throughput = bn.ResultFloat(units="MB/s", doc="Throughput")

offset = 0.0

def benchmark(self):
self.throughput = self.size * 0.5 + self.offset


@pytest.fixture(scope="module", name="res_1d")
def fixture_res_1d():
run_cfg = run_cfg_with(repeats=5)
bench = BandBench().to_bench(run_cfg)
return bench.plot_sweep(
"band_1d", input_vars=["size"], result_vars=["throughput"], run_cfg=run_cfg
)


@pytest.fixture(scope="module", name="res_cat")
def fixture_res_cat():
run_cfg = run_cfg_with(repeats=2)
bench = BandCatBench().to_bench(run_cfg)
return bench.plot_sweep(
"band_cat",
input_vars=["size", "backend"],
result_vars=["throughput"],
run_cfg=run_cfg,
)


@pytest.fixture(scope="module", name="res_time")
def fixture_res_time():
benchable = BandTimeBench()
run_cfg = bn.BenchRunCfg(
over_time=True, repeats=2, cache_results=False, cache_samples=False, auto_plot=False
)
bench = benchable.to_bench(run_cfg)
res = None
for i in range(3):
benchable.offset = i * 1.0
run_cfg.clear_cache = True
run_cfg.clear_history = i == 0
res = bench.plot_sweep(
"band_time",
input_vars=["size"],
result_vars=["throughput"],
run_cfg=run_cfg,
time_src=f"2026-06-{10 + i:02d} snap{i:04d}",
)
return res


class TestBandResult:
def test_to_band_overlay_composition(self, res_1d):
"""to_band yields two percentile Areas, a median Curve and a samples Scatter."""
plot = res_1d.to_band()
assert plot is not None
overlay = unwrap_hv(plot)
assert isinstance(overlay, hv.Overlay)
# exact types: hv.Area is a subclass of hv.Curve, so isinstance would double count
assert len([el for el in overlay if type(el) is hv.Area]) == 2
assert len([el for el in overlay if type(el) is hv.Curve]) == 1
Comment thread
sourcery-ai[bot] marked this conversation as resolved.
assert len([el for el in overlay if type(el) is hv.Scatter]) == 1

def test_band_labels_and_dims(self, res_1d):
"""Element labels and kdims/vdims reflect the input and result variables."""
overlay = unwrap_hv(res_1d.to_band())
labels = sorted(el.label for el in overlay)
assert labels == sorted(["10th–90th pctl", "25th–75th pctl", "median", "samples"])
for el in overlay:
assert [d.name for d in el.kdims] == ["size"]
outer = next(el for el in overlay if el.label == "10th–90th pctl")
assert [d.name for d in outer.vdims] == ["throughput_p10", "throughput_p90"]
median = next(el for el in overlay if el.label == "median")
assert [d.name for d in median.vdims] == ["throughput"]

def test_band_title_and_ylabel(self, res_1d):
"""Default title names var vs x-axis; ylabel includes the units."""
overlay = unwrap_hv(res_1d.to_band())
opts = plot_opts(overlay)
assert opts["title"] == "throughput vs size (aggregated over repeat)"
assert opts["ylabel"] == "throughput [MB/s]"

def test_band_explicit_title_preserved(self, res_1d):
ds = res_1d.to_dataset(reduce=ReduceType.NONE)
rv = res_1d.bench_cfg.result_vars[0]
overlay = res_1d.to_band_ds(ds, rv, title="my custom title")
assert plot_opts(overlay)["title"] == "my custom title"

def test_band_enable_scatter_false(self, res_1d):
"""enable_scatter=False drops the samples Scatter layer."""
ds = res_1d.to_dataset(reduce=ReduceType.NONE)
rv = res_1d.bench_cfg.result_vars[0]
overlay = res_1d.to_band_ds(ds, rv, enable_scatter=False)
assert not any(isinstance(el, hv.Scatter) for el in overlay)
assert any(isinstance(el, hv.Curve) for el in overlay)

def test_band_categorical_flattened_into_samples(self, res_cat):
"""A categorical dim becomes part of the sample pool; the float stays on x."""
overlay = unwrap_hv(res_cat.to_band())
assert isinstance(overlay, hv.Overlay)
for el in overlay:
assert [d.name for d in el.kdims] == ["size"]
assert plot_opts(overlay)["title"] == "throughput vs size (aggregated over backend)"

def test_band_over_time_uses_time_axis(self, res_time):
"""With over_time history, the band x-axis is the over_time dimension."""
ds = res_time.to_dataset(reduce=ReduceType.NONE)
rv = res_time.bench_cfg.result_vars[0]
overlay = res_time.to_band_ds(ds, rv)
assert isinstance(overlay, hv.Overlay)
for el in overlay:
assert [d.name for d in el.kdims] == ["over_time"]
assert plot_opts(overlay)["title"] == "throughput vs over_time (aggregated over size)"

def test_band_suppressed_when_regression_overlay_exists(self, res_1d):
"""to_band_ds returns None when the regression overlay already shows the history."""
ds = res_1d.to_dataset(reduce=ReduceType.NONE)
rv = res_1d.bench_cfg.result_vars[0]
original = res_1d.regression_report
res_1d.regression_report = SimpleNamespace(
results=[SimpleNamespace(variable="throughput", historical=[1.0, 2.0])]
)
try:
assert res_1d.to_band_ds(ds, rv) is None
finally:
res_1d.regression_report = original

def test_to_band_rejects_non_scalar_result(self):
"""A non-scalar (vector) result is outside SCALAR_RESULT_TYPES, so no band is drawn.

BandResult's filter accepts any float/cat/repeat shape (repeats>=1 included),
so the meaningful rejection path is the result type — a vector sweep must not
silently produce a misleading band overlay.
"""
run_cfg = run_cfg_with(repeats=3)
bench = BandVecBench().to_bench(run_cfg)
res = bench.plot_sweep(
"band_vec", input_vars=["size"], result_vars=["vec"], run_cfg=run_cfg
)
result = res.to(BandResult, override=False)
assert not isinstance(unwrap_hv(result), hv.Overlay)

def test_band_nan_input_does_not_crash(self):
"""NaN results survive percentile computation and are masked out of the scatter."""
run_cfg = run_cfg_with(repeats=3)
bench = BandNanBench().to_bench(run_cfg)
res = bench.plot_sweep(
"band_nan", input_vars=["size"], result_vars=["throughput"], run_cfg=run_cfg
)
overlay = unwrap_hv(res.to_band())
assert isinstance(overlay, hv.Overlay)
scatter = next(el for el in overlay if isinstance(el, hv.Scatter))
assert not np.isnan(scatter.dimension_values("throughput")).any()
Loading