diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000000..0051faffc6 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,127 @@ +name: Benchmarks + +on: + pull_request: + paths-ignore: + - README.md + - CONTRIBUTING.md + - CITATION.cff + - LICENSE + - .readthedocs.yml + - docs-img/** + - docs/** + - awkward-cpp/docs/** + - studies/** + + workflow_dispatch: + +concurrency: + group: "benchmark-${{ github.head_ref || github.run_id }}" + cancel-in-progress: true + +jobs: + run-benchmarks: + name: Run Benchmarks + + runs-on: self-hosted + + env: + PIP_ONLY_BINARY: numpy + + # Required for miniconda to activate conda + defaults: + run: + shell: bash -l {0} + + steps: + - name: Clean the workspace and mamba + run: | + rm -rf * .[!.]* || echo "Nothing to clean" + rm -rf ~/micromamba* || echo "Nothing to clean" + + - uses: actions/checkout@v4 + with: + submodules: true + + - name: Get micromamba + uses: mamba-org/setup-micromamba@v2 + with: + environment-name: test-env + init-shell: bash + create-args: >- + python=3.13 + + - name: Generate build files + run: | + pip install pipx + pipx run nox -s prepare -- --headers --signatures --tests + + - name: Cache awkward-cpp wheel + id: cache-awkward-cpp-wheel + uses: actions/cache@v4 + with: + path: awkward-cpp/dist + key: ${{ github.job }}-${{ hashFiles('awkward-cpp/**') }} + + - name: Build awkward-cpp wheel + if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true' + run: | + python -m pip install build + python -m build -w awkward-cpp + + - name: Find built wheel + uses: tj-actions/glob@v22 + id: find-wheel + with: + files: | + awkward-cpp/dist/*.whl + + - name: Add workaround for 3.13 + cramjam + run: echo 'PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1' >> $GITHUB_ENV + shell: bash + + - name: Install awkward, awkward-cpp, and dependencies + run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures + + - name: Setup Benchmark Env + run: python -m pip install -r benchmarks/requirements-benchmark.txt + + - name: Print versions + run: python -m pip list + + - name: Run Benchmark and Comparisons + id: benchmark_and_compare + run: | + cd benchmarks/ + ./run_action.sh + COMPARISON=$(cat $(echo $BASE_OUTPUT_DIR)/comparison.md) + { + echo "comparison<> $GITHUB_OUTPUT + cd .. + env: + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} # usually: main + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch + BASE_OUTPUT_DIR: results + continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps + + - name: Comment on PR + uses: actions/github-script@v7 + if: ${{ hashFiles('benchmarks/results/comparison.md') != '' }} # if there's no comparison.md, we won't post anything + with: + script: | + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: process.env.COMPARISON + }) + env: + COMPARISON: ${{ steps.benchmark_and_compare.outputs.comparison }} + + - name: Cleanup Benchmark Outputs + run: rm -r benchmarks/$BASE_OUTPUT_DIR + env: + BASE_OUTPUT_DIR: results diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000000..d08d1698cc --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,16 @@ +# Benchmarks + +Setup env: +``` +pip install -r requirements-benchmark.txt +``` + +Run with: +```shell +./run_local.sh +``` + +If you have 2 benchmark results to compare: +```shell +python compare file1.json file2.json +``` diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 0000000000..943e37b5bf --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +import google_benchmark + +# explicit imports to register all benchmarks +import misc_benchmark # noqa: F401 +import reducer_benchmark # noqa: F401 + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/compare.py b/benchmarks/compare.py new file mode 100644 index 0000000000..22034f6e69 --- /dev/null +++ b/benchmarks/compare.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +import json +import os +import pathlib +import sys + + +def load_json(filepath): + with open(filepath) as f: + return json.load(f) + + +def relative_difference(val1, val2): + return abs(val1 - val2) / min(val1, val2) + + +def compare_benchmarks( + file1_path, + file2_path, + output_path, + threshold=0.1, +): + data1 = load_json(file1_path) + data2 = load_json(file2_path) + + bm1 = {b["name"]: b for b in data1["benchmarks"]} + bm2 = {b["name"]: b for b in data2["benchmarks"]} + + output_lines = [] + + found_diffs = False + + for name in bm1: # noqa: PLC0206 + if name in bm2: + b1 = bm1[name] + b2 = bm2[name] + + cpu1 = b1["cpu_time"] + cpu2 = b2["cpu_time"] + rel_diff = relative_difference(cpu1, cpu2) + + if rel_diff > threshold: + found_diffs = True + display_name = name + + direction = "🟢 **Improvement**" if cpu2 < cpu1 else "šŸ”“ **Regression**" + diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}" + + output_lines.append(f"### šŸ”¹ {display_name}\n") + output_lines.append(diff_line + "\n") + + # Collapsed detailed comparison + output_lines.append( + "
Show full comparison\n\n" + ) + + def _parse_branch_sha(file_path): + assert file_path.endswith(".json") + file_path = file_path.replace(".json", "") # remove file ending + file_path = file_path.replace( + os.getenv("BASE_OUTPUT_DIR", "results") + "/", "", 1 + ) # remove base dir + branch, sha = file_path.rsplit("__", 1) # get branch & SHA + return f"branch: `{branch}` (sha: {sha})" + + headers = [ + "Metric", + _parse_branch_sha(file1_path), + _parse_branch_sha(file2_path), + ] + time_unit = b1.get("time_unit", "") + assert time_unit == b2.get("time_unit", ""), ( + "Can't compare difference units" + ) + + table = [ + "| " + " | ".join(headers) + " |", + "| --- | --- | --- |", + f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |", + f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |", + ] + eps = "elements_per_second" # potential extra info + if eps in b1 and eps in b2: + table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"] + + output_lines.extend(table) + output_lines.append("\n
\n") + + if not found_diffs: + print( # noqa: T201 + f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found." + ) + return + + header = ["## Benchmarks"] + markdown_output = "\n".join(header + output_lines) + + # Print to terminal + print(markdown_output) # noqa: T201 + + # Save to file + with open(output_file, "w") as f: + f.write(markdown_output + "\n") + + print(f"\nāœ… Detailed Markdown saved to `{output_file}`") # noqa: T201 + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json") # noqa: T201 + sys.exit(1) + + file1 = sys.argv[1] + file2 = sys.argv[2] + + output_file = str( + pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results")) + / pathlib.Path("comparison.md") + ) + compare_benchmarks( + file1, file2, output_file, threshold=0.1 + ) # increase threshold if it's too noisy diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py new file mode 100644 index 0000000000..89b881ae93 --- /dev/null +++ b/benchmarks/misc_benchmark.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +import google_benchmark +from util import Jagged, benchmark, format_benchmark_name + +import awkward as ak + + +def _prepare_fun_benchmark(fun): + return [ + { + "name": format_benchmark_name( + { + "op_name": fun.__name__, + "array": mkarr.__name__, + "length": length, + "dtype": dtype, + } + ), + "mkarr": mkarr, + "length": length, + "dtype": dtype, + "fun": fun, + } + for mkarr in [Jagged] + for length in [1 << i for i in (12, 16, 20)] + for dtype in ["float64"] + ] + + +def _general_fun_benchmark(state, **kwargs): + mkarr = kwargs["mkarr"] + length = kwargs["length"] + dtype = kwargs["dtype"] + fun = kwargs["fun"] + + # create singely jagged awkward array + ak_array = mkarr(length, dtype) + + # for ak.imag/real we need to add imaginary component + if fun in (ak.imag, ak.real): + ak_array = ak_array + 1j * ak_array + + # run measurement + while state: + fun(ak_array) + + # track how many elements per second are processed + state.counters["elements_per_second"] = google_benchmark.Counter( + length * state.iterations, google_benchmark.Counter.kIsRate + ) + + +# extend for more misc funs +FUNS = [ + ak.angle, + ak.drop_none, + ak.imag, + ak.is_none, + ak.is_valid, + ak.nan_to_none, + ak.nan_to_num, + ak.real, + ak.round, + ak.validity_error, +] + +# register as benchmarks +for fun in FUNS: + + @benchmark(_prepare_fun_benchmark(fun=fun)) + def _(state, **kwargs): + _general_fun_benchmark(state, **kwargs) + + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py new file mode 100644 index 0000000000..779113e4b7 --- /dev/null +++ b/benchmarks/reducer_benchmark.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import google_benchmark +from util import Jagged, benchmark, format_benchmark_name + +import awkward as ak + + +def _prepare_reducer_benchmark(reducer): + return [ + { + "name": format_benchmark_name( + { + "op_name": reducer.__name__, + "array": mkarr.__name__, + "length": length, + "dtype": dtype, + "axis": axis, + } + ), + "mkarr": mkarr, + "length": length, + "dtype": dtype, + "reducer": reducer, + "axis": axis, + } + for mkarr in [Jagged] + for length in [1 << i for i in (12, 16, 20)] + for dtype in ["float64"] + for axis in ([None, 0, 1] if mkarr is Jagged else [None]) + ] + + +def _general_reducer_benchmark(state, **kwargs): + mkarr = kwargs["mkarr"] + length = kwargs["length"] + dtype = kwargs["dtype"] + reducer = kwargs["reducer"] + axis = kwargs["axis"] + + # create singely jagged awkward array + ak_array = mkarr(length, dtype) + + # run measurement + while state: + reducer(ak_array, axis=axis) + + # track how many elements per second are processed + state.counters["elements_per_second"] = google_benchmark.Counter( + length * state.iterations, google_benchmark.Counter.kIsRate + ) + + +# extend for more reducers +REDUCERS = [ + ak.all, + ak.any, + ak.argmax, + ak.argmin, + ak.max, + ak.mean, + ak.min, + ak.prod, + ak.std, + ak.sum, + ak.var, +] + +# register as benchmarks +for reducer in REDUCERS: + + @benchmark(_prepare_reducer_benchmark(reducer=reducer)) + def _(state, **kwargs): + _general_reducer_benchmark(state, **kwargs) + + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/requirements-benchmark.txt b/benchmarks/requirements-benchmark.txt new file mode 100644 index 0000000000..1c47cea3a0 --- /dev/null +++ b/benchmarks/requirements-benchmark.txt @@ -0,0 +1 @@ +google-benchmark diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh new file mode 100755 index 0000000000..7131f2e8ed --- /dev/null +++ b/benchmarks/run_action.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +action() { + # This is for the HEAD@PR (including main merged) + + # setup output dir + local orig_git_hash + orig_git_hash=$(git rev-parse --verify HEAD) + local output_path_feature=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${orig_git_hash}.json + + # Temporarily merge the target branch + git checkout -b pr_branch + git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on + git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed + # shellcheck disable=SC2028 + git merge --no-commit --no-ff origin/"${TARGET_BRANCH}" || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false) + + # create + mkdir -p "$(dirname "${output_path_feature}")" + + python benchmark.py \ + --benchmark_time_unit=ms \ + --benchmark_out="${output_path_feature}" \ + --benchmark_out_format=json + + + # This is for HEAD@main (usually main, not necessarily though) + git stash + git checkout origin/"${TARGET_BRANCH}" + + local target_git_hash + target_git_hash=$(git rev-parse --verify HEAD) + local output_path_target=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${target_git_hash}.json + + # create + mkdir -p "$(dirname "${output_path_target}")" + + python benchmark.py \ + --benchmark_time_unit=ms \ + --benchmark_out="${output_path_target}" \ + --benchmark_out_format=json + + # Compare both + # first: switch back to original commit + git stash + git checkout "${orig_git_hash}" + + python compare.py "${output_path_target}" "${output_path_feature}" +} +action "$@" diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh new file mode 100755 index 0000000000..19a3370169 --- /dev/null +++ b/benchmarks/run_local.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +action() { + # setup output dir + local current_git_hash + current_git_hash=$(git rev-parse --verify HEAD) + local results_dir=results/${current_git_hash} + + # create + mkdir -p "$results_dir" + + local bm_script="benchmark.py" + + python $bm_script \ + --benchmark_time_unit=ms \ + --benchmark_color=true \ + --benchmark_out="$results_dir"/$bm_script.json \ + --benchmark_out_format=json +} +action "$@" diff --git a/benchmarks/util.py b/benchmarks/util.py new file mode 100644 index 0000000000..b0ee6a096d --- /dev/null +++ b/benchmarks/util.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import functools +import math + +import google_benchmark +import numpy as np # noqa: TID251 + +import awkward as ak + + +# reproducible rng +def rng(): + return np.random.default_rng(seed=42) + + +def benchmark(*args): + def decorator(func): + for test_case in args[0]: + + @google_benchmark.register(name=test_case["name"]) + @functools.wraps(func) + def wrapper(state, test_case=test_case): + return func(state, **test_case) + + return wrapper + + return decorator + + +def _generate_counts(sum_upto: int, how_many: int) -> np.ndarray: + counts = ( + rng() + .multinomial(sum_upto, rng().dirichlet(np.ones(how_many) * 0.3)) + .astype("int") + ) + assert np.sum(counts) == sum_upto + return counts + + +def Jagged(length, dtype): + """creates a singely jagged array""" + flat_content = rng().random(length, dtype) + + # seems like a reasonable heuristic + powof2 = int(math.log(length) / math.log(2)) + how_many = (1 << (powof2 // 2)) * 10 + + assert how_many < length + + counts = _generate_counts(length, how_many) + return ak.unflatten(flat_content, counts) + + +def Flat(length, dtype): + """creates a flat array""" + return ak.Array(rng().random(length, dtype)) + + +def format_benchmark_name(params: dict) -> str: + base = "ak." + params.pop("op_name", "??") + array = params.pop("array", "??") + length = params.pop("length", "??") + dtype_short = ( + params.pop("dtype", "??") + .replace("float", "f") + .replace("int", "i") + .replace("complex", "c") + ) + + pretty_name = f"{base}({array}<{dtype_short}[{length}]>" + + # any extra parameters to the function, e.g. `axis=0` + for k, v in params.items(): + pretty_name += f", {k}={v}" + + pretty_name += ")" + return pretty_name