From f0305a09c08749eb65bcd20d4b8ba74a23ccb479 Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Mon, 16 Jun 2025 13:20:58 -0400 Subject: [PATCH 1/9] try running benchmarks for awkward --- .github/workflows/benchmark.yml | 123 +++++++++++++++++++++++ benchmarks/README.md | 16 +++ benchmarks/__init__.py | 0 benchmarks/benchmark.py | 9 ++ benchmarks/compare.py | 135 ++++++++++++++++++++++++++ benchmarks/misc_benchmark.py | 65 +++++++++++++ benchmarks/reducer_benchmark.py | 70 +++++++++++++ benchmarks/requirements-benchmark.txt | 1 + benchmarks/run_action.sh | 49 ++++++++++ benchmarks/run_local.sh | 19 ++++ benchmarks/util.py | 51 ++++++++++ 11 files changed, 538 insertions(+) create mode 100644 .github/workflows/benchmark.yml create mode 100644 benchmarks/README.md create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/benchmark.py create mode 100644 benchmarks/compare.py create mode 100644 benchmarks/misc_benchmark.py create mode 100644 benchmarks/reducer_benchmark.py create mode 100644 benchmarks/requirements-benchmark.txt create mode 100755 benchmarks/run_action.sh create mode 100755 benchmarks/run_local.sh create mode 100644 benchmarks/util.py diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000000..d7e8f65c78 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,123 @@ +name: Benchmarks + +on: + pull_request: + paths-ignore: + - README.md + - CONTRIBUTING.md + - CITATION.cff + - LICENSE + - .readthedocs.yml + - docs-img/** + - docs/** + - awkward-cpp/docs/** + - studies/** + + workflow_dispatch: + +concurrency: + group: "benchmark-${{ github.head_ref || github.run_id }}" + cancel-in-progress: true + +jobs: + run-tests: + name: Run Benchmarks + strategy: + fail-fast: false + matrix: + runs-on: + - ubuntu-latest + python-version: + - "3.13" + python-architecture: + - x64 + + runs-on: ${{ matrix.runs-on }} + + env: + PIP_ONLY_BINARY: numpy + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: "Python ${{ matrix.python-version }}" + uses: actions/setup-python@v5 + with: + python-version: "${{ matrix.python-version }}" + architecture: "${{ matrix.python-architecture }}" + allow-prereleases: true + + - name: Generate build files + run: pipx run nox -s prepare -- --headers --signatures --tests + + - name: Cache awkward-cpp wheel + id: cache-awkward-cpp-wheel + uses: actions/cache@v4 + with: + path: awkward-cpp/dist + key: ${{ github.job }}-${{ matrix.runs-on }}-${{ matrix.python-version }}-${{ matrix.python-architecture }}-${{ hashFiles('awkward-cpp/**') }} + + - name: Build awkward-cpp wheel + if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true' + run: | + python -m pip install build + python -m build -w awkward-cpp + + - name: Find built wheel + uses: tj-actions/glob@v22 + id: find-wheel + with: + files: | + awkward-cpp/dist/*.whl + + - name: Install awkward, awkward-cpp, and dependencies + run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures + + - name: Setup Benchmark Env + run: python -m pip install -r benchmarks/requirements-benchmark.txt + + - name: Print versions + run: python -m pip list + + - name: Get PR target branch + id: get_target_branch + uses: actions/github-script@v7 + with: + result-encoding: string + script: | + const { data: pullRequest } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: "awkward", + pull_number: "${{ inputs.pr-number }}", + }); + return pullRequest.base.ref; + + - name: Run Benchmark and Comparisons + id: benchmark_and_compare + shell: bash + run: | + cd benchmarks/ + ./run_action.sh + echo "comparison='$(cat BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT + env: + TARGET_BRANCH: ${{ steps.get_target_branch.outputs.result }} # usually: main + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch + BASE_OUTPUT_DIR: results_PR${{ inputs.pr-number }} + continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps + + - name: Comment on PR + uses: actions/github-script@v7 + if: ${{ hashFiles(format('{0}/comparison.md', ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }})) != '' }} # if there's no comparison.md, we won't post anything + with: + script: | + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `${{ steps.benchmark_and_compare.outputs.comparison }}` + }) + + - name: Cleanup Benchmark Outputs + run: rm -r ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }} diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000000..d08d1698cc --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,16 @@ +# Benchmarks + +Setup env: +``` +pip install -r requirements-benchmark.txt +``` + +Run with: +```shell +./run_local.sh +``` + +If you have 2 benchmark results to compare: +```shell +python compare file1.json file2.json +``` diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 0000000000..586932f284 --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,9 @@ +import google_benchmark + +# explicit imports to register all benchmarks +import reducer_benchmark # noqa +import misc_benchmark # noqa + + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/compare.py b/benchmarks/compare.py new file mode 100644 index 0000000000..ffa588775e --- /dev/null +++ b/benchmarks/compare.py @@ -0,0 +1,135 @@ +import json +import sys +import os +import pathlib + + +def load_json(filepath): + with open(filepath, "r") as f: + return json.load(f) + + +def relative_difference(val1, val2): + return abs(val1 - val2) / min(val1, val2) + + +def format_benchmark_name(name: str) -> str: + try: + parts = name.split("/") + base = parts[0] + params = {k: v for k, v in (part.split("=", 1) for part in parts[1:])} + + array = params.pop("array", "??") + length = params.pop("length", "??") + dtype = params.pop("dtype", "").replace("'", "") + dtype_short = { + "float64": "f64", + "float32": "f32", + "int64": "i64", + "int32": "i32", + }.get(dtype, dtype) + + pretty_name = f"{base}({array}<{length},{dtype_short}>" + + # any extra parameters to the function, e.g. `axis=0` + for k,v in params.items(): + pretty_name += f", {k}={v}" + + pretty_name += ")" + return pretty_name + except Exception: + return name # fallback + + +def compare_benchmarks( + file1_path, + file2_path, + output_path, + threshold=0.1, +): + data1 = load_json(file1_path) + data2 = load_json(file2_path) + + bm1 = {b["name"]: b for b in data1["benchmarks"]} + bm2 = {b["name"]: b for b in data2["benchmarks"]} + + file1_short = os.path.basename(file1_path) + file2_short = os.path.basename(file2_path) + + output_lines = [] + + found_diffs = False + + for name in bm1: + if name in bm2: + b1 = bm1[name] + b2 = bm2[name] + + cpu1 = b1["cpu_time"] + cpu2 = b2["cpu_time"] + rel_diff = relative_difference(cpu1, cpu2) + + if rel_diff > threshold: + found_diffs = True + display_name = format_benchmark_name(name) + + direction = "🟢 **Improvement**" if cpu2 < cpu1 else "šŸ”“ **Regression**" + diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}" + + output_lines.append(f"### šŸ”¹ {display_name}\n") + output_lines.append(diff_line + "\n") + + # Collapsed detailed comparison + output_lines.append( + "
Show full comparison\n\n" + ) + + headers = ["Metric", f"{file1_short}", f"{file2_short}"] + time_unit = b1.get("time_unit", "") + assert time_unit == b2.get("time_unit", ""), ( + "Can't compare difference units" + ) + + table = [ + "| " + " | ".join(headers) + " |", + "| --- | --- | --- |", + f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |", + f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |", + ] + eps = "elements_per_second" # potential extra info + if eps in b1 and eps in b2: + table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"] + + output_lines.extend(table) + output_lines.append("\n
\n") + + if not found_diffs: + print(f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found.") + return + + header = ["## Benchmarks"] + markdown_output = "\n".join(header + output_lines) + + # Print to terminal + print(markdown_output) + + # Save to file + with open(output_file, "w") as f: + f.write(markdown_output + "\n") + + print(f"\nāœ… Detailed Markdown saved to `{output_file}`") + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json") + sys.exit(1) + + file1 = sys.argv[1] + file2 = sys.argv[2] + + output_file = str( + pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results")) + / pathlib.Path("comparison.md") + ) + compare_benchmarks(file1, file2, output_file) diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py new file mode 100644 index 0000000000..e038ea32ee --- /dev/null +++ b/benchmarks/misc_benchmark.py @@ -0,0 +1,65 @@ +import awkward as ak +import google_benchmark + +from util import benchmark, Jagged, Flat + + +def _prepare_fun_benchmark(fun): + return [ + { + "name": f"ak.{fun.__name__}/array={mkarr.__name__}/{length=}/{dtype=}", + "mkarr": mkarr, + "length": length, + "dtype": dtype, + "fun": fun, + } + for mkarr in (Jagged, Flat) + for length in [1 << i for i in (12, 16, 20)] + for dtype in ["float64"] + ] + + +def _general_fun_benchmark(state, **kwargs): + mkarr = kwargs["mkarr"] + length = kwargs["length"] + dtype = kwargs["dtype"] + fun = kwargs["fun"] + + # create singely jagged awkward array + ak_array = mkarr(length, dtype) + + # run measurement + while state: + fun(ak_array) + + # track how many elements per second are processed + state.counters["elements_per_second"] = google_benchmark.Counter( + length * state.iterations, google_benchmark.Counter.kIsRate + ) + + +# extend for more misc funs +# some of them don't make much sense to benchmark I suppose... +FUNS = [ + ak.angle, + ak.drop_none, + ak.imag, + ak.is_none, + ak.is_valid, + ak.nan_to_none, + ak.nan_to_num, + ak.real, + ak.round, + ak.validity_error, +] + +# register as benchmarks +for fun in FUNS: + + @benchmark(_prepare_fun_benchmark(fun=fun)) + def _(state, **kwargs): + _general_fun_benchmark(state, **kwargs) + + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py new file mode 100644 index 0000000000..d2b557fce6 --- /dev/null +++ b/benchmarks/reducer_benchmark.py @@ -0,0 +1,70 @@ +import awkward as ak +import google_benchmark + +from util import benchmark, Jagged, Flat +import time + + +def _prepare_reducer_benchmark(reducer): + return [ + { + "name": f"ak.{reducer.__name__}/array={mkarr.__name__}/{length=}/{dtype=}/{axis=}", + "mkarr": mkarr, + "length": length, + "dtype": dtype, + "reducer": reducer, + "axis": axis, + } + for mkarr in (Jagged, Flat) + for length in [1 << i for i in (12, 16, 20)] + for dtype in ["float64"] + for axis in ([None, 0, 1] if mkarr is Jagged else [None]) + ] + + +def _general_reducer_benchmark(state, **kwargs): + mkarr = kwargs["mkarr"] + length = kwargs["length"] + dtype = kwargs["dtype"] + reducer = kwargs["reducer"] + axis = kwargs["axis"] + + # create singely jagged awkward array + ak_array = mkarr(length, dtype) + + # run measurement + while state: + time.sleep(0.1) + reducer(ak_array, axis=axis) + + # track how many elements per second are processed + state.counters["elements_per_second"] = google_benchmark.Counter( + length * state.iterations, google_benchmark.Counter.kIsRate + ) + + +# extend for more reducers +REDUCERS = [ + ak.all, + ak.any, + ak.argmax, + ak.argmin, + ak.max, + ak.mean, + ak.min, + ak.prod, + ak.std, + ak.sum, + ak.var, +] + +# register as benchmarks +for reducer in REDUCERS: + + @benchmark(_prepare_reducer_benchmark(reducer=reducer)) + def _(state, **kwargs): + _general_reducer_benchmark(state, **kwargs) + + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/requirements-benchmark.txt b/benchmarks/requirements-benchmark.txt new file mode 100644 index 0000000000..1c47cea3a0 --- /dev/null +++ b/benchmarks/requirements-benchmark.txt @@ -0,0 +1 @@ +google-benchmark diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh new file mode 100755 index 0000000000..07340addbc --- /dev/null +++ b/benchmarks/run_action.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +action() { + # This is for the HEAD@PR (including main merged) + + # setup output dir + local current_git_hash=$(git rev-parse --verify HEAD) + local results_dir=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${current_git_hash} + local output_path_feature=${results_dir}/${bm_script}.json + + # Temporarily merge the target branch + git checkout -b pr_branch + git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on + git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed + git merge --no-commit --no-ff origin/${TARGET_BRANCH} || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false) + + # create + mkdir -p $results_dir + + local bm_script="benchmark.py" + + python $bm_script \ + --benchmark_time_unit=ms \ + --benchmark_out=${output_path_feature} \ + --benchmark_out_format=json + + + # This is for HEAD@main (usually main, not necessarily though) + git stash + git checkout origin/${TARGET_BRANCH} + + local current_git_hash=$(git rev-parse --verify HEAD) + local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash} + local output_path_target=${results_dir}/${bm_script}.json + + # create + mkdir -p $results_dir + + local bm_script="benchmark.py" + + python $bm_script \ + --benchmark_time_unit=ms \ + --benchmark_out=${output_path_target} \ + --benchmark_out_format=json + + # Compare both + python compare.py ${output_path_target} ${output_path_feature} +} +action "$@" diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh new file mode 100755 index 0000000000..f797094f15 --- /dev/null +++ b/benchmarks/run_local.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +action() { + # setup output dir + local current_git_hash=$(git rev-parse --verify HEAD) + local results_dir=results/${current_git_hash} + + # create + mkdir -p $results_dir + + local bm_script="benchmark.py" + + python $bm_script \ + --benchmark_time_unit=ms \ + --benchmark_color=true \ + --benchmark_out=$results_dir/$bm_script.json \ + --benchmark_out_format=json +} +action "$@" diff --git a/benchmarks/util.py b/benchmarks/util.py new file mode 100644 index 0000000000..963b5e612d --- /dev/null +++ b/benchmarks/util.py @@ -0,0 +1,51 @@ +import functools +import math +import awkward as ak +import numpy as np +import google_benchmark + + +# reproducible rng +def rng(): + return np.random.default_rng(seed=42) + + +def benchmark(*args): + def decorator(func): + for test_case in args[0]: + + @google_benchmark.register(name=test_case["name"]) + @functools.wraps(func) + def wrapper(state, test_case=test_case): + return func(state, **test_case) + + return wrapper + + return decorator + + +def _generate_counts(sum_upto: int, how_many: int) -> np.ndarray: + counts = rng().multinomial( + sum_upto, rng().dirichlet(np.ones(how_many) * 0.3) + ).astype("int") + assert np.sum(counts) == sum_upto + return counts + + +def Jagged(length, dtype): + """creates a singely jagged array""" + flat_content = rng().random(length, dtype) + + # seems like a reasonable heuristic + powof2 = int(math.log(length) / math.log(2)) + how_many = (1 << (powof2 // 2)) * 10 + + assert how_many < length + + counts = _generate_counts(length, how_many) + return ak.unflatten(flat_content, counts) + + +def Flat(length, dtype): + """creates a flat array""" + return ak.Array(rng().random(length, dtype)) From 5ce99ec7171f96492d0ca9cd79ac978c8af09aac Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Mon, 16 Jun 2025 13:20:58 -0400 Subject: [PATCH 2/9] try running benchmarks for awkward --- .github/workflows/benchmark.yml | 123 +++++++++++++++++++++++ benchmarks/README.md | 16 +++ benchmarks/__init__.py | 0 benchmarks/benchmark.py | 9 ++ benchmarks/compare.py | 135 ++++++++++++++++++++++++++ benchmarks/misc_benchmark.py | 65 +++++++++++++ benchmarks/reducer_benchmark.py | 68 +++++++++++++ benchmarks/requirements-benchmark.txt | 1 + benchmarks/run_action.sh | 49 ++++++++++ benchmarks/run_local.sh | 19 ++++ benchmarks/util.py | 51 ++++++++++ 11 files changed, 536 insertions(+) create mode 100644 .github/workflows/benchmark.yml create mode 100644 benchmarks/README.md create mode 100644 benchmarks/__init__.py create mode 100644 benchmarks/benchmark.py create mode 100644 benchmarks/compare.py create mode 100644 benchmarks/misc_benchmark.py create mode 100644 benchmarks/reducer_benchmark.py create mode 100644 benchmarks/requirements-benchmark.txt create mode 100755 benchmarks/run_action.sh create mode 100755 benchmarks/run_local.sh create mode 100644 benchmarks/util.py diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 0000000000..d7e8f65c78 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,123 @@ +name: Benchmarks + +on: + pull_request: + paths-ignore: + - README.md + - CONTRIBUTING.md + - CITATION.cff + - LICENSE + - .readthedocs.yml + - docs-img/** + - docs/** + - awkward-cpp/docs/** + - studies/** + + workflow_dispatch: + +concurrency: + group: "benchmark-${{ github.head_ref || github.run_id }}" + cancel-in-progress: true + +jobs: + run-tests: + name: Run Benchmarks + strategy: + fail-fast: false + matrix: + runs-on: + - ubuntu-latest + python-version: + - "3.13" + python-architecture: + - x64 + + runs-on: ${{ matrix.runs-on }} + + env: + PIP_ONLY_BINARY: numpy + + steps: + - uses: actions/checkout@v4 + with: + submodules: true + + - name: "Python ${{ matrix.python-version }}" + uses: actions/setup-python@v5 + with: + python-version: "${{ matrix.python-version }}" + architecture: "${{ matrix.python-architecture }}" + allow-prereleases: true + + - name: Generate build files + run: pipx run nox -s prepare -- --headers --signatures --tests + + - name: Cache awkward-cpp wheel + id: cache-awkward-cpp-wheel + uses: actions/cache@v4 + with: + path: awkward-cpp/dist + key: ${{ github.job }}-${{ matrix.runs-on }}-${{ matrix.python-version }}-${{ matrix.python-architecture }}-${{ hashFiles('awkward-cpp/**') }} + + - name: Build awkward-cpp wheel + if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true' + run: | + python -m pip install build + python -m build -w awkward-cpp + + - name: Find built wheel + uses: tj-actions/glob@v22 + id: find-wheel + with: + files: | + awkward-cpp/dist/*.whl + + - name: Install awkward, awkward-cpp, and dependencies + run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures + + - name: Setup Benchmark Env + run: python -m pip install -r benchmarks/requirements-benchmark.txt + + - name: Print versions + run: python -m pip list + + - name: Get PR target branch + id: get_target_branch + uses: actions/github-script@v7 + with: + result-encoding: string + script: | + const { data: pullRequest } = await github.rest.pulls.get({ + owner: context.repo.owner, + repo: "awkward", + pull_number: "${{ inputs.pr-number }}", + }); + return pullRequest.base.ref; + + - name: Run Benchmark and Comparisons + id: benchmark_and_compare + shell: bash + run: | + cd benchmarks/ + ./run_action.sh + echo "comparison='$(cat BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT + env: + TARGET_BRANCH: ${{ steps.get_target_branch.outputs.result }} # usually: main + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch + BASE_OUTPUT_DIR: results_PR${{ inputs.pr-number }} + continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps + + - name: Comment on PR + uses: actions/github-script@v7 + if: ${{ hashFiles(format('{0}/comparison.md', ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }})) != '' }} # if there's no comparison.md, we won't post anything + with: + script: | + github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: `${{ steps.benchmark_and_compare.outputs.comparison }}` + }) + + - name: Cleanup Benchmark Outputs + run: rm -r ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }} diff --git a/benchmarks/README.md b/benchmarks/README.md new file mode 100644 index 0000000000..d08d1698cc --- /dev/null +++ b/benchmarks/README.md @@ -0,0 +1,16 @@ +# Benchmarks + +Setup env: +``` +pip install -r requirements-benchmark.txt +``` + +Run with: +```shell +./run_local.sh +``` + +If you have 2 benchmark results to compare: +```shell +python compare file1.json file2.json +``` diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 0000000000..586932f284 --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,9 @@ +import google_benchmark + +# explicit imports to register all benchmarks +import reducer_benchmark # noqa +import misc_benchmark # noqa + + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/compare.py b/benchmarks/compare.py new file mode 100644 index 0000000000..ffa588775e --- /dev/null +++ b/benchmarks/compare.py @@ -0,0 +1,135 @@ +import json +import sys +import os +import pathlib + + +def load_json(filepath): + with open(filepath, "r") as f: + return json.load(f) + + +def relative_difference(val1, val2): + return abs(val1 - val2) / min(val1, val2) + + +def format_benchmark_name(name: str) -> str: + try: + parts = name.split("/") + base = parts[0] + params = {k: v for k, v in (part.split("=", 1) for part in parts[1:])} + + array = params.pop("array", "??") + length = params.pop("length", "??") + dtype = params.pop("dtype", "").replace("'", "") + dtype_short = { + "float64": "f64", + "float32": "f32", + "int64": "i64", + "int32": "i32", + }.get(dtype, dtype) + + pretty_name = f"{base}({array}<{length},{dtype_short}>" + + # any extra parameters to the function, e.g. `axis=0` + for k,v in params.items(): + pretty_name += f", {k}={v}" + + pretty_name += ")" + return pretty_name + except Exception: + return name # fallback + + +def compare_benchmarks( + file1_path, + file2_path, + output_path, + threshold=0.1, +): + data1 = load_json(file1_path) + data2 = load_json(file2_path) + + bm1 = {b["name"]: b for b in data1["benchmarks"]} + bm2 = {b["name"]: b for b in data2["benchmarks"]} + + file1_short = os.path.basename(file1_path) + file2_short = os.path.basename(file2_path) + + output_lines = [] + + found_diffs = False + + for name in bm1: + if name in bm2: + b1 = bm1[name] + b2 = bm2[name] + + cpu1 = b1["cpu_time"] + cpu2 = b2["cpu_time"] + rel_diff = relative_difference(cpu1, cpu2) + + if rel_diff > threshold: + found_diffs = True + display_name = format_benchmark_name(name) + + direction = "🟢 **Improvement**" if cpu2 < cpu1 else "šŸ”“ **Regression**" + diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}" + + output_lines.append(f"### šŸ”¹ {display_name}\n") + output_lines.append(diff_line + "\n") + + # Collapsed detailed comparison + output_lines.append( + "
Show full comparison\n\n" + ) + + headers = ["Metric", f"{file1_short}", f"{file2_short}"] + time_unit = b1.get("time_unit", "") + assert time_unit == b2.get("time_unit", ""), ( + "Can't compare difference units" + ) + + table = [ + "| " + " | ".join(headers) + " |", + "| --- | --- | --- |", + f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |", + f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |", + ] + eps = "elements_per_second" # potential extra info + if eps in b1 and eps in b2: + table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"] + + output_lines.extend(table) + output_lines.append("\n
\n") + + if not found_diffs: + print(f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found.") + return + + header = ["## Benchmarks"] + markdown_output = "\n".join(header + output_lines) + + # Print to terminal + print(markdown_output) + + # Save to file + with open(output_file, "w") as f: + f.write(markdown_output + "\n") + + print(f"\nāœ… Detailed Markdown saved to `{output_file}`") + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json") + sys.exit(1) + + file1 = sys.argv[1] + file2 = sys.argv[2] + + output_file = str( + pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results")) + / pathlib.Path("comparison.md") + ) + compare_benchmarks(file1, file2, output_file) diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py new file mode 100644 index 0000000000..e038ea32ee --- /dev/null +++ b/benchmarks/misc_benchmark.py @@ -0,0 +1,65 @@ +import awkward as ak +import google_benchmark + +from util import benchmark, Jagged, Flat + + +def _prepare_fun_benchmark(fun): + return [ + { + "name": f"ak.{fun.__name__}/array={mkarr.__name__}/{length=}/{dtype=}", + "mkarr": mkarr, + "length": length, + "dtype": dtype, + "fun": fun, + } + for mkarr in (Jagged, Flat) + for length in [1 << i for i in (12, 16, 20)] + for dtype in ["float64"] + ] + + +def _general_fun_benchmark(state, **kwargs): + mkarr = kwargs["mkarr"] + length = kwargs["length"] + dtype = kwargs["dtype"] + fun = kwargs["fun"] + + # create singely jagged awkward array + ak_array = mkarr(length, dtype) + + # run measurement + while state: + fun(ak_array) + + # track how many elements per second are processed + state.counters["elements_per_second"] = google_benchmark.Counter( + length * state.iterations, google_benchmark.Counter.kIsRate + ) + + +# extend for more misc funs +# some of them don't make much sense to benchmark I suppose... +FUNS = [ + ak.angle, + ak.drop_none, + ak.imag, + ak.is_none, + ak.is_valid, + ak.nan_to_none, + ak.nan_to_num, + ak.real, + ak.round, + ak.validity_error, +] + +# register as benchmarks +for fun in FUNS: + + @benchmark(_prepare_fun_benchmark(fun=fun)) + def _(state, **kwargs): + _general_fun_benchmark(state, **kwargs) + + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py new file mode 100644 index 0000000000..eabd534afd --- /dev/null +++ b/benchmarks/reducer_benchmark.py @@ -0,0 +1,68 @@ +import awkward as ak +import google_benchmark + +from util import benchmark, Jagged, Flat + + +def _prepare_reducer_benchmark(reducer): + return [ + { + "name": f"ak.{reducer.__name__}/array={mkarr.__name__}/{length=}/{dtype=}/{axis=}", + "mkarr": mkarr, + "length": length, + "dtype": dtype, + "reducer": reducer, + "axis": axis, + } + for mkarr in (Jagged, Flat) + for length in [1 << i for i in (12, 16, 20)] + for dtype in ["float64"] + for axis in ([None, 0, 1] if mkarr is Jagged else [None]) + ] + + +def _general_reducer_benchmark(state, **kwargs): + mkarr = kwargs["mkarr"] + length = kwargs["length"] + dtype = kwargs["dtype"] + reducer = kwargs["reducer"] + axis = kwargs["axis"] + + # create singely jagged awkward array + ak_array = mkarr(length, dtype) + + # run measurement + while state: + reducer(ak_array, axis=axis) + + # track how many elements per second are processed + state.counters["elements_per_second"] = google_benchmark.Counter( + length * state.iterations, google_benchmark.Counter.kIsRate + ) + + +# extend for more reducers +REDUCERS = [ + ak.all, + ak.any, + ak.argmax, + ak.argmin, + ak.max, + ak.mean, + ak.min, + ak.prod, + ak.std, + ak.sum, + ak.var, +] + +# register as benchmarks +for reducer in REDUCERS: + + @benchmark(_prepare_reducer_benchmark(reducer=reducer)) + def _(state, **kwargs): + _general_reducer_benchmark(state, **kwargs) + + +if __name__ == "__main__": + google_benchmark.main() diff --git a/benchmarks/requirements-benchmark.txt b/benchmarks/requirements-benchmark.txt new file mode 100644 index 0000000000..1c47cea3a0 --- /dev/null +++ b/benchmarks/requirements-benchmark.txt @@ -0,0 +1 @@ +google-benchmark diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh new file mode 100755 index 0000000000..07340addbc --- /dev/null +++ b/benchmarks/run_action.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +action() { + # This is for the HEAD@PR (including main merged) + + # setup output dir + local current_git_hash=$(git rev-parse --verify HEAD) + local results_dir=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${current_git_hash} + local output_path_feature=${results_dir}/${bm_script}.json + + # Temporarily merge the target branch + git checkout -b pr_branch + git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on + git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed + git merge --no-commit --no-ff origin/${TARGET_BRANCH} || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false) + + # create + mkdir -p $results_dir + + local bm_script="benchmark.py" + + python $bm_script \ + --benchmark_time_unit=ms \ + --benchmark_out=${output_path_feature} \ + --benchmark_out_format=json + + + # This is for HEAD@main (usually main, not necessarily though) + git stash + git checkout origin/${TARGET_BRANCH} + + local current_git_hash=$(git rev-parse --verify HEAD) + local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash} + local output_path_target=${results_dir}/${bm_script}.json + + # create + mkdir -p $results_dir + + local bm_script="benchmark.py" + + python $bm_script \ + --benchmark_time_unit=ms \ + --benchmark_out=${output_path_target} \ + --benchmark_out_format=json + + # Compare both + python compare.py ${output_path_target} ${output_path_feature} +} +action "$@" diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh new file mode 100755 index 0000000000..f797094f15 --- /dev/null +++ b/benchmarks/run_local.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +action() { + # setup output dir + local current_git_hash=$(git rev-parse --verify HEAD) + local results_dir=results/${current_git_hash} + + # create + mkdir -p $results_dir + + local bm_script="benchmark.py" + + python $bm_script \ + --benchmark_time_unit=ms \ + --benchmark_color=true \ + --benchmark_out=$results_dir/$bm_script.json \ + --benchmark_out_format=json +} +action "$@" diff --git a/benchmarks/util.py b/benchmarks/util.py new file mode 100644 index 0000000000..963b5e612d --- /dev/null +++ b/benchmarks/util.py @@ -0,0 +1,51 @@ +import functools +import math +import awkward as ak +import numpy as np +import google_benchmark + + +# reproducible rng +def rng(): + return np.random.default_rng(seed=42) + + +def benchmark(*args): + def decorator(func): + for test_case in args[0]: + + @google_benchmark.register(name=test_case["name"]) + @functools.wraps(func) + def wrapper(state, test_case=test_case): + return func(state, **test_case) + + return wrapper + + return decorator + + +def _generate_counts(sum_upto: int, how_many: int) -> np.ndarray: + counts = rng().multinomial( + sum_upto, rng().dirichlet(np.ones(how_many) * 0.3) + ).astype("int") + assert np.sum(counts) == sum_upto + return counts + + +def Jagged(length, dtype): + """creates a singely jagged array""" + flat_content = rng().random(length, dtype) + + # seems like a reasonable heuristic + powof2 = int(math.log(length) / math.log(2)) + how_many = (1 << (powof2 // 2)) * 10 + + assert how_many < length + + counts = _generate_counts(length, how_many) + return ak.unflatten(flat_content, counts) + + +def Flat(length, dtype): + """creates a flat array""" + return ak.Array(rng().random(length, dtype)) From 0c6e14f4ce1c29b783407ee76c856909dc95618f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 Jun 2025 17:34:01 +0000 Subject: [PATCH 3/9] style: pre-commit fixes --- benchmarks/benchmark.py | 5 +++-- benchmarks/compare.py | 14 +++++++++----- benchmarks/misc_benchmark.py | 6 ++++-- benchmarks/reducer_benchmark.py | 6 ++++-- benchmarks/util.py | 16 +++++++++++----- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index 586932f284..efaba33490 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -1,9 +1,10 @@ +from __future__ import annotations + import google_benchmark +import misc_benchmark # noqa # explicit imports to register all benchmarks import reducer_benchmark # noqa -import misc_benchmark # noqa - if __name__ == "__main__": google_benchmark.main() diff --git a/benchmarks/compare.py b/benchmarks/compare.py index ffa588775e..38532b87b4 100644 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -1,11 +1,13 @@ +from __future__ import annotations + import json -import sys import os import pathlib +import sys def load_json(filepath): - with open(filepath, "r") as f: + with open(filepath) as f: return json.load(f) @@ -32,7 +34,7 @@ def format_benchmark_name(name: str) -> str: pretty_name = f"{base}({array}<{length},{dtype_short}>" # any extra parameters to the function, e.g. `axis=0` - for k,v in params.items(): + for k, v in params.items(): pretty_name += f", {k}={v}" pretty_name += ")" @@ -96,7 +98,7 @@ def compare_benchmarks( f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |", f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |", ] - eps = "elements_per_second" # potential extra info + eps = "elements_per_second" # potential extra info if eps in b1 and eps in b2: table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"] @@ -104,7 +106,9 @@ def compare_benchmarks( output_lines.append("\n\n") if not found_diffs: - print(f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found.") + print( + f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found." + ) return header = ["## Benchmarks"] diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py index e038ea32ee..07c75d54c4 100644 --- a/benchmarks/misc_benchmark.py +++ b/benchmarks/misc_benchmark.py @@ -1,7 +1,9 @@ -import awkward as ak +from __future__ import annotations + import google_benchmark +from util import Flat, Jagged, benchmark -from util import benchmark, Jagged, Flat +import awkward as ak def _prepare_fun_benchmark(fun): diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py index eabd534afd..86b20dbc9e 100644 --- a/benchmarks/reducer_benchmark.py +++ b/benchmarks/reducer_benchmark.py @@ -1,7 +1,9 @@ -import awkward as ak +from __future__ import annotations + import google_benchmark +from util import Flat, Jagged, benchmark -from util import benchmark, Jagged, Flat +import awkward as ak def _prepare_reducer_benchmark(reducer): diff --git a/benchmarks/util.py b/benchmarks/util.py index 963b5e612d..fb1cfb2f94 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -1,8 +1,12 @@ +from __future__ import annotations + import functools import math -import awkward as ak -import numpy as np + import google_benchmark +import numpy as np + +import awkward as ak # reproducible rng @@ -25,9 +29,11 @@ def wrapper(state, test_case=test_case): def _generate_counts(sum_upto: int, how_many: int) -> np.ndarray: - counts = rng().multinomial( - sum_upto, rng().dirichlet(np.ones(how_many) * 0.3) - ).astype("int") + counts = ( + rng() + .multinomial(sum_upto, rng().dirichlet(np.ones(how_many) * 0.3)) + .astype("int") + ) assert np.sum(counts) == sum_upto return counts From fdc73ae21c710589bb2265288adfd7e464871d9f Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Mon, 16 Jun 2025 13:48:48 -0400 Subject: [PATCH 4/9] satisfy precommit --- benchmarks/benchmark.py | 4 ++-- benchmarks/compare.py | 12 ++++++------ benchmarks/run_action.sh | 14 +++++++------- benchmarks/run_local.sh | 4 ++-- benchmarks/util.py | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py index efaba33490..943e37b5bf 100644 --- a/benchmarks/benchmark.py +++ b/benchmarks/benchmark.py @@ -1,10 +1,10 @@ from __future__ import annotations import google_benchmark -import misc_benchmark # noqa # explicit imports to register all benchmarks -import reducer_benchmark # noqa +import misc_benchmark # noqa: F401 +import reducer_benchmark # noqa: F401 if __name__ == "__main__": google_benchmark.main() diff --git a/benchmarks/compare.py b/benchmarks/compare.py index 38532b87b4..0998bf042a 100644 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -19,7 +19,7 @@ def format_benchmark_name(name: str) -> str: try: parts = name.split("/") base = parts[0] - params = {k: v for k, v in (part.split("=", 1) for part in parts[1:])} + params = dict(part.split("=", 1) for part in parts[1:]) array = params.pop("array", "??") length = params.pop("length", "??") @@ -62,7 +62,7 @@ def compare_benchmarks( found_diffs = False - for name in bm1: + for name in bm1: # noqa: PLC0206 if name in bm2: b1 = bm1[name] b2 = bm2[name] @@ -106,7 +106,7 @@ def compare_benchmarks( output_lines.append("\n\n") if not found_diffs: - print( + print( # noqa: T201 f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found." ) return @@ -115,18 +115,18 @@ def compare_benchmarks( markdown_output = "\n".join(header + output_lines) # Print to terminal - print(markdown_output) + print(markdown_output) # noqa: T201 # Save to file with open(output_file, "w") as f: f.write(markdown_output + "\n") - print(f"\nāœ… Detailed Markdown saved to `{output_file}`") + print(f"\nāœ… Detailed Markdown saved to `{output_file}`") # noqa: T201 if __name__ == "__main__": if len(sys.argv) != 3: - print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json") + print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json") # noqa: T201 sys.exit(1) file1 = sys.argv[1] diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh index 07340addbc..c387541c49 100755 --- a/benchmarks/run_action.sh +++ b/benchmarks/run_action.sh @@ -12,38 +12,38 @@ action() { git checkout -b pr_branch git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed - git merge --no-commit --no-ff origin/${TARGET_BRANCH} || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false) + git merge --no-commit --no-ff origin/"${TARGET_BRANCH}" || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false) # create - mkdir -p $results_dir + mkdir -p "$results_dir" local bm_script="benchmark.py" python $bm_script \ --benchmark_time_unit=ms \ - --benchmark_out=${output_path_feature} \ + --benchmark_out="${output_path_feature}" \ --benchmark_out_format=json # This is for HEAD@main (usually main, not necessarily though) git stash - git checkout origin/${TARGET_BRANCH} + git checkout origin/"${TARGET_BRANCH}" local current_git_hash=$(git rev-parse --verify HEAD) local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash} local output_path_target=${results_dir}/${bm_script}.json # create - mkdir -p $results_dir + mkdir -p "$results_dir" local bm_script="benchmark.py" python $bm_script \ --benchmark_time_unit=ms \ - --benchmark_out=${output_path_target} \ + --benchmark_out="${output_path_target}" \ --benchmark_out_format=json # Compare both - python compare.py ${output_path_target} ${output_path_feature} + python compare.py "${output_path_target}" "${output_path_feature}" } action "$@" diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh index f797094f15..cd4873b185 100755 --- a/benchmarks/run_local.sh +++ b/benchmarks/run_local.sh @@ -6,14 +6,14 @@ action() { local results_dir=results/${current_git_hash} # create - mkdir -p $results_dir + mkdir -p "$results_dir" local bm_script="benchmark.py" python $bm_script \ --benchmark_time_unit=ms \ --benchmark_color=true \ - --benchmark_out=$results_dir/$bm_script.json \ + --benchmark_out="$results_dir"/$bm_script.json \ --benchmark_out_format=json } action "$@" diff --git a/benchmarks/util.py b/benchmarks/util.py index fb1cfb2f94..fbd85f7f65 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -4,7 +4,7 @@ import math import google_benchmark -import numpy as np +import numpy as np # noqa: TID251 import awkward as ak From 9e7e05ef2d917ba3dc2f71e2273bbd11a0878619 Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Mon, 16 Jun 2025 13:51:23 -0400 Subject: [PATCH 5/9] try this syntax --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index d7e8f65c78..3ce3ace34c 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -109,7 +109,7 @@ jobs: - name: Comment on PR uses: actions/github-script@v7 - if: ${{ hashFiles(format('{0}/comparison.md', ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }})) != '' }} # if there's no comparison.md, we won't post anything + if: ${{ hashFiles(format('{0}/comparison.md', steps.benchmark_and_compare.env.BASE_OUTPUT_DIR)) != '' }} # if there's no comparison.md, we won't post anything with: script: | github.rest.issues.createComment({ From e8dff90584d207f3cc9010121ae3baffa5cd9847 Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Mon, 16 Jun 2025 14:52:26 -0400 Subject: [PATCH 6/9] satisfy shellcheck --- benchmarks/run_action.sh | 7 +++++-- benchmarks/run_local.sh | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh index c387541c49..807526b46a 100755 --- a/benchmarks/run_action.sh +++ b/benchmarks/run_action.sh @@ -4,7 +4,8 @@ action() { # This is for the HEAD@PR (including main merged) # setup output dir - local current_git_hash=$(git rev-parse --verify HEAD) + local current_git_hash + current_git_hash=$(git rev-parse --verify HEAD) local results_dir=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${current_git_hash} local output_path_feature=${results_dir}/${bm_script}.json @@ -12,6 +13,7 @@ action() { git checkout -b pr_branch git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed + # shellcheck disable=SC2028 git merge --no-commit --no-ff origin/"${TARGET_BRANCH}" || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false) # create @@ -29,7 +31,8 @@ action() { git stash git checkout origin/"${TARGET_BRANCH}" - local current_git_hash=$(git rev-parse --verify HEAD) + local current_git_hash + current_git_hash=$(git rev-parse --verify HEAD) local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash} local output_path_target=${results_dir}/${bm_script}.json diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh index cd4873b185..19a3370169 100755 --- a/benchmarks/run_local.sh +++ b/benchmarks/run_local.sh @@ -2,7 +2,8 @@ action() { # setup output dir - local current_git_hash=$(git rev-parse --verify HEAD) + local current_git_hash + current_git_hash=$(git rev-parse --verify HEAD) local results_dir=results/${current_git_hash} # create From 1f8eae088ec5387828a7844dd5225a1c91c34087 Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Mon, 16 Jun 2025 14:55:53 -0400 Subject: [PATCH 7/9] try fix getting target branch name --- .github/workflows/benchmark.yml | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 3ce3ace34c..1f8daaddc2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -81,28 +81,15 @@ jobs: - name: Print versions run: python -m pip list - - name: Get PR target branch - id: get_target_branch - uses: actions/github-script@v7 - with: - result-encoding: string - script: | - const { data: pullRequest } = await github.rest.pulls.get({ - owner: context.repo.owner, - repo: "awkward", - pull_number: "${{ inputs.pr-number }}", - }); - return pullRequest.base.ref; - - name: Run Benchmark and Comparisons id: benchmark_and_compare shell: bash run: | cd benchmarks/ ./run_action.sh - echo "comparison='$(cat BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT + echo "comparison='$(cat $BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT env: - TARGET_BRANCH: ${{ steps.get_target_branch.outputs.result }} # usually: main + TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} # usually: main BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch BASE_OUTPUT_DIR: results_PR${{ inputs.pr-number }} continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps From 3e2bf2d5949cba98378a7056f6800942b5210789 Mon Sep 17 00:00:00 2001 From: Peter Fackeldey Date: Tue, 17 Jun 2025 11:40:00 -0400 Subject: [PATCH 8/9] fix: debug benchmarks (#3550) * debug test 1 * fix output piping for benchmark comparisons * try handling multiline outputs * hopefully fix paths * another try to pass multiline output * try fix json file names * fix directory creation for benchmark results * please precommit * prettify table headers * style: pre-commit fixes * go back to original commit for comparison * prettify branch name and SHA in table header * style: pre-commit fixes * try self-hosted runner * another try with self-hosted runner * another try --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .github/workflows/benchmark.yml | 69 ++++++++++++++++++++------------- benchmarks/compare.py | 18 +++++++-- benchmarks/misc_benchmark.py | 1 - benchmarks/run_action.sh | 30 +++++++------- 4 files changed, 71 insertions(+), 47 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 1f8daaddc2..0051faffc6 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -20,44 +20,48 @@ concurrency: cancel-in-progress: true jobs: - run-tests: + run-benchmarks: name: Run Benchmarks - strategy: - fail-fast: false - matrix: - runs-on: - - ubuntu-latest - python-version: - - "3.13" - python-architecture: - - x64 - - runs-on: ${{ matrix.runs-on }} + + runs-on: self-hosted env: PIP_ONLY_BINARY: numpy + # Required for miniconda to activate conda + defaults: + run: + shell: bash -l {0} + steps: + - name: Clean the workspace and mamba + run: | + rm -rf * .[!.]* || echo "Nothing to clean" + rm -rf ~/micromamba* || echo "Nothing to clean" + - uses: actions/checkout@v4 with: submodules: true - - name: "Python ${{ matrix.python-version }}" - uses: actions/setup-python@v5 + - name: Get micromamba + uses: mamba-org/setup-micromamba@v2 with: - python-version: "${{ matrix.python-version }}" - architecture: "${{ matrix.python-architecture }}" - allow-prereleases: true + environment-name: test-env + init-shell: bash + create-args: >- + python=3.13 - name: Generate build files - run: pipx run nox -s prepare -- --headers --signatures --tests + run: | + pip install pipx + pipx run nox -s prepare -- --headers --signatures --tests - name: Cache awkward-cpp wheel id: cache-awkward-cpp-wheel uses: actions/cache@v4 with: path: awkward-cpp/dist - key: ${{ github.job }}-${{ matrix.runs-on }}-${{ matrix.python-version }}-${{ matrix.python-architecture }}-${{ hashFiles('awkward-cpp/**') }} + key: ${{ github.job }}-${{ hashFiles('awkward-cpp/**') }} - name: Build awkward-cpp wheel if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true' @@ -72,6 +76,10 @@ jobs: files: | awkward-cpp/dist/*.whl + - name: Add workaround for 3.13 + cramjam + run: echo 'PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1' >> $GITHUB_ENV + shell: bash + - name: Install awkward, awkward-cpp, and dependencies run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures @@ -83,28 +91,37 @@ jobs: - name: Run Benchmark and Comparisons id: benchmark_and_compare - shell: bash run: | cd benchmarks/ ./run_action.sh - echo "comparison='$(cat $BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT + COMPARISON=$(cat $(echo $BASE_OUTPUT_DIR)/comparison.md) + { + echo "comparison<> $GITHUB_OUTPUT + cd .. env: TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} # usually: main BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch - BASE_OUTPUT_DIR: results_PR${{ inputs.pr-number }} - continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps + BASE_OUTPUT_DIR: results + continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps - name: Comment on PR uses: actions/github-script@v7 - if: ${{ hashFiles(format('{0}/comparison.md', steps.benchmark_and_compare.env.BASE_OUTPUT_DIR)) != '' }} # if there's no comparison.md, we won't post anything + if: ${{ hashFiles('benchmarks/results/comparison.md') != '' }} # if there's no comparison.md, we won't post anything with: script: | github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, - body: `${{ steps.benchmark_and_compare.outputs.comparison }}` + body: process.env.COMPARISON }) + env: + COMPARISON: ${{ steps.benchmark_and_compare.outputs.comparison }} - name: Cleanup Benchmark Outputs - run: rm -r ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }} + run: rm -r benchmarks/$BASE_OUTPUT_DIR + env: + BASE_OUTPUT_DIR: results diff --git a/benchmarks/compare.py b/benchmarks/compare.py index 0998bf042a..66c29240ab 100644 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -55,9 +55,6 @@ def compare_benchmarks( bm1 = {b["name"]: b for b in data1["benchmarks"]} bm2 = {b["name"]: b for b in data2["benchmarks"]} - file1_short = os.path.basename(file1_path) - file2_short = os.path.basename(file2_path) - output_lines = [] found_diffs = False @@ -86,7 +83,20 @@ def compare_benchmarks( "
Show full comparison\n\n" ) - headers = ["Metric", f"{file1_short}", f"{file2_short}"] + def _parse_branch_sha(file_path): + assert file_path.endswith(".json") + file_path = file_path.replace(".json", "") # remove file ending + file_path = file_path.replace( + os.getenv("BASE_OUTPUT_DIR", "results") + "/", "", 1 + ) # remove base dir + branch, sha = file_path.rsplit("__", 1) # get branch & SHA + return f"branch: `{branch}` (sha: {sha})" + + headers = [ + "Metric", + _parse_branch_sha(file1_path), + _parse_branch_sha(file2_path), + ] time_unit = b1.get("time_unit", "") assert time_unit == b2.get("time_unit", ""), ( "Can't compare difference units" diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py index 07c75d54c4..0419ec9126 100644 --- a/benchmarks/misc_benchmark.py +++ b/benchmarks/misc_benchmark.py @@ -41,7 +41,6 @@ def _general_fun_benchmark(state, **kwargs): # extend for more misc funs -# some of them don't make much sense to benchmark I suppose... FUNS = [ ak.angle, ak.drop_none, diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh index 807526b46a..7131f2e8ed 100755 --- a/benchmarks/run_action.sh +++ b/benchmarks/run_action.sh @@ -4,10 +4,9 @@ action() { # This is for the HEAD@PR (including main merged) # setup output dir - local current_git_hash - current_git_hash=$(git rev-parse --verify HEAD) - local results_dir=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${current_git_hash} - local output_path_feature=${results_dir}/${bm_script}.json + local orig_git_hash + orig_git_hash=$(git rev-parse --verify HEAD) + local output_path_feature=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${orig_git_hash}.json # Temporarily merge the target branch git checkout -b pr_branch @@ -17,11 +16,9 @@ action() { git merge --no-commit --no-ff origin/"${TARGET_BRANCH}" || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false) # create - mkdir -p "$results_dir" + mkdir -p "$(dirname "${output_path_feature}")" - local bm_script="benchmark.py" - - python $bm_script \ + python benchmark.py \ --benchmark_time_unit=ms \ --benchmark_out="${output_path_feature}" \ --benchmark_out_format=json @@ -31,22 +28,23 @@ action() { git stash git checkout origin/"${TARGET_BRANCH}" - local current_git_hash - current_git_hash=$(git rev-parse --verify HEAD) - local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash} - local output_path_target=${results_dir}/${bm_script}.json + local target_git_hash + target_git_hash=$(git rev-parse --verify HEAD) + local output_path_target=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${target_git_hash}.json # create - mkdir -p "$results_dir" - - local bm_script="benchmark.py" + mkdir -p "$(dirname "${output_path_target}")" - python $bm_script \ + python benchmark.py \ --benchmark_time_unit=ms \ --benchmark_out="${output_path_target}" \ --benchmark_out_format=json # Compare both + # first: switch back to original commit + git stash + git checkout "${orig_git_hash}" + python compare.py "${output_path_target}" "${output_path_feature}" } action "$@" From 6d7517dcc0e0ef8efba2b75cae4a2fbab460fd3d Mon Sep 17 00:00:00 2001 From: pfackeldey Date: Tue, 17 Jun 2025 11:48:39 -0400 Subject: [PATCH 9/9] prettify python side --- benchmarks/compare.py | 34 ++++----------------------------- benchmarks/misc_benchmark.py | 17 ++++++++++++++--- benchmarks/reducer_benchmark.py | 14 +++++++++++--- benchmarks/util.py | 21 ++++++++++++++++++++ 4 files changed, 50 insertions(+), 36 deletions(-) diff --git a/benchmarks/compare.py b/benchmarks/compare.py index 66c29240ab..22034f6e69 100644 --- a/benchmarks/compare.py +++ b/benchmarks/compare.py @@ -15,34 +15,6 @@ def relative_difference(val1, val2): return abs(val1 - val2) / min(val1, val2) -def format_benchmark_name(name: str) -> str: - try: - parts = name.split("/") - base = parts[0] - params = dict(part.split("=", 1) for part in parts[1:]) - - array = params.pop("array", "??") - length = params.pop("length", "??") - dtype = params.pop("dtype", "").replace("'", "") - dtype_short = { - "float64": "f64", - "float32": "f32", - "int64": "i64", - "int32": "i32", - }.get(dtype, dtype) - - pretty_name = f"{base}({array}<{length},{dtype_short}>" - - # any extra parameters to the function, e.g. `axis=0` - for k, v in params.items(): - pretty_name += f", {k}={v}" - - pretty_name += ")" - return pretty_name - except Exception: - return name # fallback - - def compare_benchmarks( file1_path, file2_path, @@ -70,7 +42,7 @@ def compare_benchmarks( if rel_diff > threshold: found_diffs = True - display_name = format_benchmark_name(name) + display_name = name direction = "🟢 **Improvement**" if cpu2 < cpu1 else "šŸ”“ **Regression**" diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}" @@ -146,4 +118,6 @@ def _parse_branch_sha(file_path): pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results")) / pathlib.Path("comparison.md") ) - compare_benchmarks(file1, file2, output_file) + compare_benchmarks( + file1, file2, output_file, threshold=0.1 + ) # increase threshold if it's too noisy diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py index 0419ec9126..89b881ae93 100644 --- a/benchmarks/misc_benchmark.py +++ b/benchmarks/misc_benchmark.py @@ -1,7 +1,7 @@ from __future__ import annotations import google_benchmark -from util import Flat, Jagged, benchmark +from util import Jagged, benchmark, format_benchmark_name import awkward as ak @@ -9,13 +9,20 @@ def _prepare_fun_benchmark(fun): return [ { - "name": f"ak.{fun.__name__}/array={mkarr.__name__}/{length=}/{dtype=}", + "name": format_benchmark_name( + { + "op_name": fun.__name__, + "array": mkarr.__name__, + "length": length, + "dtype": dtype, + } + ), "mkarr": mkarr, "length": length, "dtype": dtype, "fun": fun, } - for mkarr in (Jagged, Flat) + for mkarr in [Jagged] for length in [1 << i for i in (12, 16, 20)] for dtype in ["float64"] ] @@ -30,6 +37,10 @@ def _general_fun_benchmark(state, **kwargs): # create singely jagged awkward array ak_array = mkarr(length, dtype) + # for ak.imag/real we need to add imaginary component + if fun in (ak.imag, ak.real): + ak_array = ak_array + 1j * ak_array + # run measurement while state: fun(ak_array) diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py index 86b20dbc9e..779113e4b7 100644 --- a/benchmarks/reducer_benchmark.py +++ b/benchmarks/reducer_benchmark.py @@ -1,7 +1,7 @@ from __future__ import annotations import google_benchmark -from util import Flat, Jagged, benchmark +from util import Jagged, benchmark, format_benchmark_name import awkward as ak @@ -9,14 +9,22 @@ def _prepare_reducer_benchmark(reducer): return [ { - "name": f"ak.{reducer.__name__}/array={mkarr.__name__}/{length=}/{dtype=}/{axis=}", + "name": format_benchmark_name( + { + "op_name": reducer.__name__, + "array": mkarr.__name__, + "length": length, + "dtype": dtype, + "axis": axis, + } + ), "mkarr": mkarr, "length": length, "dtype": dtype, "reducer": reducer, "axis": axis, } - for mkarr in (Jagged, Flat) + for mkarr in [Jagged] for length in [1 << i for i in (12, 16, 20)] for dtype in ["float64"] for axis in ([None, 0, 1] if mkarr is Jagged else [None]) diff --git a/benchmarks/util.py b/benchmarks/util.py index fbd85f7f65..b0ee6a096d 100644 --- a/benchmarks/util.py +++ b/benchmarks/util.py @@ -55,3 +55,24 @@ def Jagged(length, dtype): def Flat(length, dtype): """creates a flat array""" return ak.Array(rng().random(length, dtype)) + + +def format_benchmark_name(params: dict) -> str: + base = "ak." + params.pop("op_name", "??") + array = params.pop("array", "??") + length = params.pop("length", "??") + dtype_short = ( + params.pop("dtype", "??") + .replace("float", "f") + .replace("int", "i") + .replace("complex", "c") + ) + + pretty_name = f"{base}({array}<{dtype_short}[{length}]>" + + # any extra parameters to the function, e.g. `axis=0` + for k, v in params.items(): + pretty_name += f", {k}={v}" + + pretty_name += ")" + return pretty_name