scikit-hep · pfackeldey · Jun 16, 2025 · Jun 16, 2025 · Jun 16, 2025 · Jun 16, 2025
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -0,0 +1,127 @@
+name: Benchmarks
+
+on:
+  pull_request:
+    paths-ignore:
+      - README.md
+      - CONTRIBUTING.md
+      - CITATION.cff
+      - LICENSE
+      - .readthedocs.yml
+      - docs-img/**
+      - docs/**
+      - awkward-cpp/docs/**
+      - studies/**
+
+  workflow_dispatch:
+
+concurrency:
+  group: "benchmark-${{ github.head_ref || github.run_id }}"
+  cancel-in-progress: true
+
+jobs:
+  run-benchmarks:
+    name: Run Benchmarks
+
+    runs-on: self-hosted
+
+    env:
+      PIP_ONLY_BINARY: numpy
+
+    # Required for miniconda to activate conda
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+      - name: Clean the workspace and mamba
+        run: |
+          rm -rf * .[!.]* || echo "Nothing to clean"
+          rm -rf ~/micromamba* || echo "Nothing to clean"
+
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Get micromamba
+        uses: mamba-org/setup-micromamba@v2
+        with:
+          environment-name: test-env
+          init-shell: bash
+          create-args: >-
+            python=3.13
+
+      - name: Generate build files
+        run: |
+          pip install pipx
+          pipx run nox -s prepare -- --headers --signatures --tests
+
+      - name: Cache awkward-cpp wheel
+        id: cache-awkward-cpp-wheel
+        uses: actions/cache@v4
+        with:
+          path: awkward-cpp/dist
+          key: ${{ github.job }}-${{ hashFiles('awkward-cpp/**') }}
+
+      - name: Build awkward-cpp wheel
+        if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true'
+        run: |
+          python -m pip install build
+          python -m build -w awkward-cpp
+
+      - name: Find built wheel
+        uses: tj-actions/glob@v22
+        id: find-wheel
+        with:
+          files: |
+            awkward-cpp/dist/*.whl
+
+      - name: Add workaround for 3.13 + cramjam
+        run: echo 'PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1' >> $GITHUB_ENV
+        shell: bash
+
+      - name: Install awkward, awkward-cpp, and dependencies
+        run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures
+
+      - name: Setup Benchmark Env
+        run: python -m pip install -r benchmarks/requirements-benchmark.txt
+
+      - name: Print versions
+        run: python -m pip list
+
+      - name: Run Benchmark and Comparisons
+        id: benchmark_and_compare
+        run: |
+          cd benchmarks/
+          ./run_action.sh
+          COMPARISON=$(cat $(echo $BASE_OUTPUT_DIR)/comparison.md)
+          {
+            echo "comparison<<EOF"
+            echo "${COMPARISON}"
+            echo "EOF"
+          } >> $GITHUB_OUTPUT
+          cd ..
+        env:
+          TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} # usually: main
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch
+          BASE_OUTPUT_DIR: results
+        continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps
+
+      - name: Comment on PR
+        uses: actions/github-script@v7
+        if: ${{ hashFiles('benchmarks/results/comparison.md') != '' }} # if there's no comparison.md, we won't post anything
+        with:
+          script: |
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: process.env.COMPARISON
+            })
+        env:
+          COMPARISON: ${{ steps.benchmark_and_compare.outputs.comparison }}
+
+      - name: Cleanup Benchmark Outputs
+        run: rm -r benchmarks/$BASE_OUTPUT_DIR
+        env:
+          BASE_OUTPUT_DIR: results
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -0,0 +1,16 @@
+# Benchmarks
+
+Setup env:
+```
+pip install -r requirements-benchmark.txt
+```
+
+Run with:
+```shell
+./run_local.sh
+```
+
+If you have 2 benchmark results to compare:
+```shell
+python compare file1.json file2.json
+```
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
@@ -0,0 +1,10 @@
+from __future__ import annotations
+
+import google_benchmark
+
+# explicit imports to register all benchmarks
+import misc_benchmark  # noqa: F401
+import reducer_benchmark  # noqa: F401
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+import sys
+
+
+def load_json(filepath):
+    with open(filepath) as f:
+        return json.load(f)
+
+
+def relative_difference(val1, val2):
+    return abs(val1 - val2) / min(val1, val2)
+
+
+def compare_benchmarks(
+    file1_path,
+    file2_path,
+    output_path,
+    threshold=0.1,
+):
+    data1 = load_json(file1_path)
+    data2 = load_json(file2_path)
+
+    bm1 = {b["name"]: b for b in data1["benchmarks"]}
+    bm2 = {b["name"]: b for b in data2["benchmarks"]}
+
+    output_lines = []
+
+    found_diffs = False
+
+    for name in bm1:  # noqa: PLC0206
+        if name in bm2:
+            b1 = bm1[name]
+            b2 = bm2[name]
+
+            cpu1 = b1["cpu_time"]
+            cpu2 = b2["cpu_time"]
+            rel_diff = relative_difference(cpu1, cpu2)
+
+            if rel_diff > threshold:
+                found_diffs = True
+                display_name = name
+
+                direction = "🟢 **Improvement**" if cpu2 < cpu1 else "🔴 **Regression**"
+                diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}"
+
+                output_lines.append(f"### 🔹 {display_name}\n")
+                output_lines.append(diff_line + "\n")
+
+                # Collapsed detailed comparison
+                output_lines.append(
+                    "<details><summary>Show full comparison</summary>\n\n"
+                )
+
+                def _parse_branch_sha(file_path):
+                    assert file_path.endswith(".json")
+                    file_path = file_path.replace(".json", "")  # remove file ending
+                    file_path = file_path.replace(
+                        os.getenv("BASE_OUTPUT_DIR", "results") + "/", "", 1
+                    )  # remove base dir
+                    branch, sha = file_path.rsplit("__", 1)  # get branch & SHA
+                    return f"branch: `{branch}` (sha: {sha})"
+
+                headers = [
+                    "Metric",
+                    _parse_branch_sha(file1_path),
+                    _parse_branch_sha(file2_path),
+                ]
+                time_unit = b1.get("time_unit", "")
+                assert time_unit == b2.get("time_unit", ""), (
+                    "Can't compare difference units"
+                )
+
+                table = [
+                    "| " + " | ".join(headers) + " |",
+                    "| --- | --- | --- |",
+                    f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |",
+                    f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |",
+                ]
+                eps = "elements_per_second"  # potential extra info
+                if eps in b1 and eps in b2:
+                    table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"]
+
+                output_lines.extend(table)
+                output_lines.append("\n</details>\n")
+
+    if not found_diffs:
+        print(  # noqa: T201
+            f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found."
+        )
+        return
+
+    header = ["## Benchmarks"]
+    markdown_output = "\n".join(header + output_lines)
+
+    # Print to terminal
+    print(markdown_output)  # noqa: T201
+
+    # Save to file
+    with open(output_file, "w") as f:
+        f.write(markdown_output + "\n")
+
+    print(f"\n✅ Detailed Markdown saved to `{output_file}`")  # noqa: T201
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json")  # noqa: T201
+        sys.exit(1)
+
+    file1 = sys.argv[1]
+    file2 = sys.argv[2]
+
+    output_file = str(
+        pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results"))
+        / pathlib.Path("comparison.md")
+    )
+    compare_benchmarks(
+        file1, file2, output_file, threshold=0.1
+    )  # increase threshold if it's too noisy
diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import google_benchmark
+from util import Jagged, benchmark, format_benchmark_name
+
+import awkward as ak
+
+
+def _prepare_fun_benchmark(fun):
+    return [
+        {
+            "name": format_benchmark_name(
+                {
+                    "op_name": fun.__name__,
+                    "array": mkarr.__name__,
+                    "length": length,
+                    "dtype": dtype,
+                }
+            ),
+            "mkarr": mkarr,
+            "length": length,
+            "dtype": dtype,
+            "fun": fun,
+        }
+        for mkarr in [Jagged]
+        for length in [1 << i for i in (12, 16, 20)]
+        for dtype in ["float64"]
+    ]
+
+
+def _general_fun_benchmark(state, **kwargs):
+    mkarr = kwargs["mkarr"]
+    length = kwargs["length"]
+    dtype = kwargs["dtype"]
+    fun = kwargs["fun"]
+
+    # create singely jagged awkward array
+    ak_array = mkarr(length, dtype)
+
+    # for ak.imag/real we need to add imaginary component
+    if fun in (ak.imag, ak.real):
+        ak_array = ak_array + 1j * ak_array
+
+    # run measurement
+    while state:
+        fun(ak_array)
+
+    # track how many elements per second are processed
+    state.counters["elements_per_second"] = google_benchmark.Counter(
+        length * state.iterations, google_benchmark.Counter.kIsRate
+    )
+
+
+# extend for more misc funs
+FUNS = [
+    ak.angle,
+    ak.drop_none,
+    ak.imag,
+    ak.is_none,
+    ak.is_valid,
+    ak.nan_to_none,
+    ak.nan_to_num,
+    ak.real,
+    ak.round,
+    ak.validity_error,
+]
+
+# register as benchmarks
+for fun in FUNS:
+
+    @benchmark(_prepare_fun_benchmark(fun=fun))
+    def _(state, **kwargs):
+        _general_fun_benchmark(state, **kwargs)
+
+
+if __name__ == "__main__":
+    google_benchmark.main()