From f0305a09c08749eb65bcd20d4b8ba74a23ccb479 Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Mon, 16 Jun 2025 13:20:58 -0400
Subject: [PATCH 1/9] try running benchmarks for awkward

---
 .github/workflows/benchmark.yml       | 123 +++++++++++++++++++++++
 benchmarks/README.md                  |  16 +++
 benchmarks/__init__.py                |   0
 benchmarks/benchmark.py               |   9 ++
 benchmarks/compare.py                 | 135 ++++++++++++++++++++++++++
 benchmarks/misc_benchmark.py          |  65 +++++++++++++
 benchmarks/reducer_benchmark.py       |  70 +++++++++++++
 benchmarks/requirements-benchmark.txt |   1 +
 benchmarks/run_action.sh              |  49 ++++++++++
 benchmarks/run_local.sh               |  19 ++++
 benchmarks/util.py                    |  51 ++++++++++
 11 files changed, 538 insertions(+)
 create mode 100644 .github/workflows/benchmark.yml
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/__init__.py
 create mode 100644 benchmarks/benchmark.py
 create mode 100644 benchmarks/compare.py
 create mode 100644 benchmarks/misc_benchmark.py
 create mode 100644 benchmarks/reducer_benchmark.py
 create mode 100644 benchmarks/requirements-benchmark.txt
 create mode 100755 benchmarks/run_action.sh
 create mode 100755 benchmarks/run_local.sh
 create mode 100644 benchmarks/util.py

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000000..d7e8f65c78
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,123 @@
+name: Benchmarks
+
+on:
+  pull_request:
+    paths-ignore:
+      - README.md
+      - CONTRIBUTING.md
+      - CITATION.cff
+      - LICENSE
+      - .readthedocs.yml
+      - docs-img/**
+      - docs/**
+      - awkward-cpp/docs/**
+      - studies/**
+
+  workflow_dispatch:
+
+concurrency:
+  group: "benchmark-${{ github.head_ref || github.run_id }}"
+  cancel-in-progress: true
+
+jobs:
+  run-tests:
+    name: Run Benchmarks
+    strategy:
+      fail-fast: false
+      matrix:
+        runs-on:
+          - ubuntu-latest
+        python-version:
+          - "3.13"
+        python-architecture:
+          - x64
+
+    runs-on: ${{ matrix.runs-on }}
+
+    env:
+      PIP_ONLY_BINARY: numpy
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: "Python ${{ matrix.python-version }}"
+        uses: actions/setup-python@v5
+        with:
+          python-version: "${{ matrix.python-version }}"
+          architecture: "${{ matrix.python-architecture }}"
+          allow-prereleases: true
+
+      - name: Generate build files
+        run: pipx run nox -s prepare -- --headers --signatures --tests
+
+      - name: Cache awkward-cpp wheel
+        id: cache-awkward-cpp-wheel
+        uses: actions/cache@v4
+        with:
+          path: awkward-cpp/dist
+          key: ${{ github.job }}-${{ matrix.runs-on }}-${{ matrix.python-version }}-${{ matrix.python-architecture }}-${{ hashFiles('awkward-cpp/**') }}
+
+      - name: Build awkward-cpp wheel
+        if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true'
+        run: |
+          python -m pip install build
+          python -m build -w awkward-cpp
+
+      - name: Find built wheel
+        uses: tj-actions/glob@v22
+        id: find-wheel
+        with:
+          files: |
+            awkward-cpp/dist/*.whl
+
+      - name: Install awkward, awkward-cpp, and dependencies
+        run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures
+
+      - name: Setup Benchmark Env
+        run: python -m pip install -r benchmarks/requirements-benchmark.txt
+
+      - name: Print versions
+        run: python -m pip list
+
+      - name: Get PR target branch
+        id: get_target_branch
+        uses: actions/github-script@v7
+        with:
+          result-encoding: string
+          script: |
+            const { data: pullRequest } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: "awkward",
+              pull_number: "${{ inputs.pr-number }}",
+            });
+            return pullRequest.base.ref;
+
+      - name: Run Benchmark and Comparisons
+        id: benchmark_and_compare
+        shell: bash
+        run: |
+          cd benchmarks/
+          ./run_action.sh
+          echo "comparison='$(cat BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT
+        env:
+          TARGET_BRANCH: ${{ steps.get_target_branch.outputs.result }} # usually: main
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch
+          BASE_OUTPUT_DIR: results_PR${{ inputs.pr-number }}
+          continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps
+
+      - name: Comment on PR
+        uses: actions/github-script@v7
+        if: ${{ hashFiles(format('{0}/comparison.md', ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }})) != '' }} # if there's no comparison.md, we won't post anything
+        with:
+          script: |
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: `${{ steps.benchmark_and_compare.outputs.comparison }}`
+            })
+
+      - name: Cleanup Benchmark Outputs
+        run: rm -r ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }}
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000000..d08d1698cc
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,16 @@
+# Benchmarks
+
+Setup env:
+```
+pip install -r requirements-benchmark.txt
+```
+
+Run with:
+```shell
+./run_local.sh
+```
+
+If you have 2 benchmark results to compare:
+```shell
+python compare file1.json file2.json
+```
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
new file mode 100644
index 0000000000..586932f284
--- /dev/null
+++ b/benchmarks/benchmark.py
@@ -0,0 +1,9 @@
+import google_benchmark
+
+# explicit imports to register all benchmarks
+import reducer_benchmark  # noqa
+import misc_benchmark  # noqa
+
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
new file mode 100644
index 0000000000..ffa588775e
--- /dev/null
+++ b/benchmarks/compare.py
@@ -0,0 +1,135 @@
+import json
+import sys
+import os
+import pathlib
+
+
+def load_json(filepath):
+    with open(filepath, "r") as f:
+        return json.load(f)
+
+
+def relative_difference(val1, val2):
+    return abs(val1 - val2) / min(val1, val2)
+
+
+def format_benchmark_name(name: str) -> str:
+    try:
+        parts = name.split("/")
+        base = parts[0]
+        params = {k: v for k, v in (part.split("=", 1) for part in parts[1:])}
+
+        array = params.pop("array", "??")
+        length = params.pop("length", "??")
+        dtype = params.pop("dtype", "").replace("'", "")
+        dtype_short = {
+            "float64": "f64",
+            "float32": "f32",
+            "int64": "i64",
+            "int32": "i32",
+        }.get(dtype, dtype)
+
+        pretty_name = f"{base}({array}<{length},{dtype_short}>"
+
+        # any extra parameters to the function, e.g. `axis=0`
+        for k,v in params.items():
+            pretty_name += f", {k}={v}"
+
+        pretty_name += ")"
+        return pretty_name
+    except Exception:
+        return name  # fallback
+
+
+def compare_benchmarks(
+    file1_path,
+    file2_path,
+    output_path,
+    threshold=0.1,
+):
+    data1 = load_json(file1_path)
+    data2 = load_json(file2_path)
+
+    bm1 = {b["name"]: b for b in data1["benchmarks"]}
+    bm2 = {b["name"]: b for b in data2["benchmarks"]}
+
+    file1_short = os.path.basename(file1_path)
+    file2_short = os.path.basename(file2_path)
+
+    output_lines = []
+
+    found_diffs = False
+
+    for name in bm1:
+        if name in bm2:
+            b1 = bm1[name]
+            b2 = bm2[name]
+
+            cpu1 = b1["cpu_time"]
+            cpu2 = b2["cpu_time"]
+            rel_diff = relative_difference(cpu1, cpu2)
+
+            if rel_diff > threshold:
+                found_diffs = True
+                display_name = format_benchmark_name(name)
+
+                direction = "🟢 **Improvement**" if cpu2 < cpu1 else "🔴 **Regression**"
+                diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}"
+
+                output_lines.append(f"### 🔹 {display_name}\n")
+                output_lines.append(diff_line + "\n")
+
+                # Collapsed detailed comparison
+                output_lines.append(
+                    "<details><summary>Show full comparison</summary>\n\n"
+                )
+
+                headers = ["Metric", f"{file1_short}", f"{file2_short}"]
+                time_unit = b1.get("time_unit", "")
+                assert time_unit == b2.get("time_unit", ""), (
+                    "Can't compare difference units"
+                )
+
+                table = [
+                    "| " + " | ".join(headers) + " |",
+                    "| --- | --- | --- |",
+                    f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |",
+                    f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |",
+                ]
+                eps = "elements_per_second" # potential extra info
+                if eps in b1 and eps in b2:
+                    table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"]
+
+                output_lines.extend(table)
+                output_lines.append("\n</details>\n")
+
+    if not found_diffs:
+        print(f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found.")
+        return
+
+    header = ["## Benchmarks"]
+    markdown_output = "\n".join(header + output_lines)
+
+    # Print to terminal
+    print(markdown_output)
+
+    # Save to file
+    with open(output_file, "w") as f:
+        f.write(markdown_output + "\n")
+
+    print(f"\n✅ Detailed Markdown saved to `{output_file}`")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json")
+        sys.exit(1)
+
+    file1 = sys.argv[1]
+    file2 = sys.argv[2]
+
+    output_file = str(
+        pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results"))
+        / pathlib.Path("comparison.md")
+    )
+    compare_benchmarks(file1, file2, output_file)
diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py
new file mode 100644
index 0000000000..e038ea32ee
--- /dev/null
+++ b/benchmarks/misc_benchmark.py
@@ -0,0 +1,65 @@
+import awkward as ak
+import google_benchmark
+
+from util import benchmark, Jagged, Flat
+
+
+def _prepare_fun_benchmark(fun):
+    return [
+        {
+            "name": f"ak.{fun.__name__}/array={mkarr.__name__}/{length=}/{dtype=}",
+            "mkarr": mkarr,
+            "length": length,
+            "dtype": dtype,
+            "fun": fun,
+        }
+        for mkarr in (Jagged, Flat)
+        for length in [1 << i for i in (12, 16, 20)]
+        for dtype in ["float64"]
+    ]
+
+
+def _general_fun_benchmark(state, **kwargs):
+    mkarr = kwargs["mkarr"]
+    length = kwargs["length"]
+    dtype = kwargs["dtype"]
+    fun = kwargs["fun"]
+
+    # create singely jagged awkward array
+    ak_array = mkarr(length, dtype)
+
+    # run measurement
+    while state:
+        fun(ak_array)
+
+    # track how many elements per second are processed
+    state.counters["elements_per_second"] = google_benchmark.Counter(
+        length * state.iterations, google_benchmark.Counter.kIsRate
+    )
+
+
+# extend for more misc funs
+# some of them don't make much sense to benchmark I suppose...
+FUNS = [
+    ak.angle,
+    ak.drop_none,
+    ak.imag,
+    ak.is_none,
+    ak.is_valid,
+    ak.nan_to_none,
+    ak.nan_to_num,
+    ak.real,
+    ak.round,
+    ak.validity_error,
+]
+
+# register as benchmarks
+for fun in FUNS:
+
+    @benchmark(_prepare_fun_benchmark(fun=fun))
+    def _(state, **kwargs):
+        _general_fun_benchmark(state, **kwargs)
+
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py
new file mode 100644
index 0000000000..d2b557fce6
--- /dev/null
+++ b/benchmarks/reducer_benchmark.py
@@ -0,0 +1,70 @@
+import awkward as ak
+import google_benchmark
+
+from util import benchmark, Jagged, Flat
+import time
+
+
+def _prepare_reducer_benchmark(reducer):
+    return [
+        {
+            "name": f"ak.{reducer.__name__}/array={mkarr.__name__}/{length=}/{dtype=}/{axis=}",
+            "mkarr": mkarr,
+            "length": length,
+            "dtype": dtype,
+            "reducer": reducer,
+            "axis": axis,
+        }
+        for mkarr in (Jagged, Flat)
+        for length in [1 << i for i in (12, 16, 20)]
+        for dtype in ["float64"]
+        for axis in ([None, 0, 1] if mkarr is Jagged else [None])
+    ]
+
+
+def _general_reducer_benchmark(state, **kwargs):
+    mkarr = kwargs["mkarr"]
+    length = kwargs["length"]
+    dtype = kwargs["dtype"]
+    reducer = kwargs["reducer"]
+    axis = kwargs["axis"]
+
+    # create singely jagged awkward array
+    ak_array = mkarr(length, dtype)
+
+    # run measurement
+    while state:
+        time.sleep(0.1)
+        reducer(ak_array, axis=axis)
+
+    # track how many elements per second are processed
+    state.counters["elements_per_second"] = google_benchmark.Counter(
+        length * state.iterations, google_benchmark.Counter.kIsRate
+    )
+
+
+# extend for more reducers
+REDUCERS = [
+    ak.all,
+    ak.any,
+    ak.argmax,
+    ak.argmin,
+    ak.max,
+    ak.mean,
+    ak.min,
+    ak.prod,
+    ak.std,
+    ak.sum,
+    ak.var,
+]
+
+# register as benchmarks
+for reducer in REDUCERS:
+
+    @benchmark(_prepare_reducer_benchmark(reducer=reducer))
+    def _(state, **kwargs):
+        _general_reducer_benchmark(state, **kwargs)
+
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/requirements-benchmark.txt b/benchmarks/requirements-benchmark.txt
new file mode 100644
index 0000000000..1c47cea3a0
--- /dev/null
+++ b/benchmarks/requirements-benchmark.txt
@@ -0,0 +1 @@
+google-benchmark
diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh
new file mode 100755
index 0000000000..07340addbc
--- /dev/null
+++ b/benchmarks/run_action.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+action() {
+    # This is for the HEAD@PR (including main merged)
+
+    # setup output dir
+    local current_git_hash=$(git rev-parse --verify HEAD)
+    local results_dir=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${current_git_hash}
+    local output_path_feature=${results_dir}/${bm_script}.json
+
+    # Temporarily merge the target branch
+    git checkout -b pr_branch
+    git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on
+    git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed
+    git merge --no-commit --no-ff origin/${TARGET_BRANCH} || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false)
+
+    # create
+    mkdir -p $results_dir
+
+    local bm_script="benchmark.py"
+
+    python $bm_script \
+        --benchmark_time_unit=ms \
+        --benchmark_out=${output_path_feature} \
+        --benchmark_out_format=json
+
+
+    # This is for HEAD@main (usually main, not necessarily though)
+    git stash
+    git checkout origin/${TARGET_BRANCH}
+
+    local current_git_hash=$(git rev-parse --verify HEAD)
+    local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash}
+    local output_path_target=${results_dir}/${bm_script}.json
+
+    # create
+    mkdir -p $results_dir
+
+    local bm_script="benchmark.py"
+
+    python $bm_script \
+        --benchmark_time_unit=ms \
+        --benchmark_out=${output_path_target} \
+        --benchmark_out_format=json
+
+    # Compare both
+    python compare.py ${output_path_target} ${output_path_feature}
+}
+action "$@"
diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh
new file mode 100755
index 0000000000..f797094f15
--- /dev/null
+++ b/benchmarks/run_local.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+action() {
+    # setup output dir
+    local current_git_hash=$(git rev-parse --verify HEAD)
+    local results_dir=results/${current_git_hash}
+
+    # create
+    mkdir -p $results_dir
+
+    local bm_script="benchmark.py"
+
+    python $bm_script \
+        --benchmark_time_unit=ms \
+        --benchmark_color=true \
+        --benchmark_out=$results_dir/$bm_script.json \
+        --benchmark_out_format=json
+}
+action "$@"
diff --git a/benchmarks/util.py b/benchmarks/util.py
new file mode 100644
index 0000000000..963b5e612d
--- /dev/null
+++ b/benchmarks/util.py
@@ -0,0 +1,51 @@
+import functools
+import math
+import awkward as ak
+import numpy as np
+import google_benchmark
+
+
+# reproducible rng
+def rng():
+    return np.random.default_rng(seed=42)
+
+
+def benchmark(*args):
+    def decorator(func):
+        for test_case in args[0]:
+
+            @google_benchmark.register(name=test_case["name"])
+            @functools.wraps(func)
+            def wrapper(state, test_case=test_case):
+                return func(state, **test_case)
+
+        return wrapper
+
+    return decorator
+
+
+def _generate_counts(sum_upto: int, how_many: int) -> np.ndarray:
+    counts = rng().multinomial(
+        sum_upto, rng().dirichlet(np.ones(how_many) * 0.3)
+    ).astype("int")
+    assert np.sum(counts) == sum_upto
+    return counts
+
+
+def Jagged(length, dtype):
+    """creates a singely jagged array"""
+    flat_content = rng().random(length, dtype)
+
+    # seems like a reasonable heuristic
+    powof2 = int(math.log(length) / math.log(2))
+    how_many = (1 << (powof2 // 2)) * 10
+
+    assert how_many < length
+
+    counts = _generate_counts(length, how_many)
+    return ak.unflatten(flat_content, counts)
+
+
+def Flat(length, dtype):
+    """creates a flat array"""
+    return ak.Array(rng().random(length, dtype))

From 5ce99ec7171f96492d0ca9cd79ac978c8af09aac Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Mon, 16 Jun 2025 13:20:58 -0400
Subject: [PATCH 2/9] try running benchmarks for awkward

---
 .github/workflows/benchmark.yml       | 123 +++++++++++++++++++++++
 benchmarks/README.md                  |  16 +++
 benchmarks/__init__.py                |   0
 benchmarks/benchmark.py               |   9 ++
 benchmarks/compare.py                 | 135 ++++++++++++++++++++++++++
 benchmarks/misc_benchmark.py          |  65 +++++++++++++
 benchmarks/reducer_benchmark.py       |  68 +++++++++++++
 benchmarks/requirements-benchmark.txt |   1 +
 benchmarks/run_action.sh              |  49 ++++++++++
 benchmarks/run_local.sh               |  19 ++++
 benchmarks/util.py                    |  51 ++++++++++
 11 files changed, 536 insertions(+)
 create mode 100644 .github/workflows/benchmark.yml
 create mode 100644 benchmarks/README.md
 create mode 100644 benchmarks/__init__.py
 create mode 100644 benchmarks/benchmark.py
 create mode 100644 benchmarks/compare.py
 create mode 100644 benchmarks/misc_benchmark.py
 create mode 100644 benchmarks/reducer_benchmark.py
 create mode 100644 benchmarks/requirements-benchmark.txt
 create mode 100755 benchmarks/run_action.sh
 create mode 100755 benchmarks/run_local.sh
 create mode 100644 benchmarks/util.py

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000000..d7e8f65c78
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,123 @@
+name: Benchmarks
+
+on:
+  pull_request:
+    paths-ignore:
+      - README.md
+      - CONTRIBUTING.md
+      - CITATION.cff
+      - LICENSE
+      - .readthedocs.yml
+      - docs-img/**
+      - docs/**
+      - awkward-cpp/docs/**
+      - studies/**
+
+  workflow_dispatch:
+
+concurrency:
+  group: "benchmark-${{ github.head_ref || github.run_id }}"
+  cancel-in-progress: true
+
+jobs:
+  run-tests:
+    name: Run Benchmarks
+    strategy:
+      fail-fast: false
+      matrix:
+        runs-on:
+          - ubuntu-latest
+        python-version:
+          - "3.13"
+        python-architecture:
+          - x64
+
+    runs-on: ${{ matrix.runs-on }}
+
+    env:
+      PIP_ONLY_BINARY: numpy
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: "Python ${{ matrix.python-version }}"
+        uses: actions/setup-python@v5
+        with:
+          python-version: "${{ matrix.python-version }}"
+          architecture: "${{ matrix.python-architecture }}"
+          allow-prereleases: true
+
+      - name: Generate build files
+        run: pipx run nox -s prepare -- --headers --signatures --tests
+
+      - name: Cache awkward-cpp wheel
+        id: cache-awkward-cpp-wheel
+        uses: actions/cache@v4
+        with:
+          path: awkward-cpp/dist
+          key: ${{ github.job }}-${{ matrix.runs-on }}-${{ matrix.python-version }}-${{ matrix.python-architecture }}-${{ hashFiles('awkward-cpp/**') }}
+
+      - name: Build awkward-cpp wheel
+        if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true'
+        run: |
+          python -m pip install build
+          python -m build -w awkward-cpp
+
+      - name: Find built wheel
+        uses: tj-actions/glob@v22
+        id: find-wheel
+        with:
+          files: |
+            awkward-cpp/dist/*.whl
+
+      - name: Install awkward, awkward-cpp, and dependencies
+        run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures
+
+      - name: Setup Benchmark Env
+        run: python -m pip install -r benchmarks/requirements-benchmark.txt
+
+      - name: Print versions
+        run: python -m pip list
+
+      - name: Get PR target branch
+        id: get_target_branch
+        uses: actions/github-script@v7
+        with:
+          result-encoding: string
+          script: |
+            const { data: pullRequest } = await github.rest.pulls.get({
+              owner: context.repo.owner,
+              repo: "awkward",
+              pull_number: "${{ inputs.pr-number }}",
+            });
+            return pullRequest.base.ref;
+
+      - name: Run Benchmark and Comparisons
+        id: benchmark_and_compare
+        shell: bash
+        run: |
+          cd benchmarks/
+          ./run_action.sh
+          echo "comparison='$(cat BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT
+        env:
+          TARGET_BRANCH: ${{ steps.get_target_branch.outputs.result }} # usually: main
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch
+          BASE_OUTPUT_DIR: results_PR${{ inputs.pr-number }}
+          continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps
+
+      - name: Comment on PR
+        uses: actions/github-script@v7
+        if: ${{ hashFiles(format('{0}/comparison.md', ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }})) != '' }} # if there's no comparison.md, we won't post anything
+        with:
+          script: |
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: `${{ steps.benchmark_and_compare.outputs.comparison }}`
+            })
+
+      - name: Cleanup Benchmark Outputs
+        run: rm -r ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }}
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000000..d08d1698cc
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,16 @@
+# Benchmarks
+
+Setup env:
+```
+pip install -r requirements-benchmark.txt
+```
+
+Run with:
+```shell
+./run_local.sh
+```
+
+If you have 2 benchmark results to compare:
+```shell
+python compare file1.json file2.json
+```
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
new file mode 100644
index 0000000000..586932f284
--- /dev/null
+++ b/benchmarks/benchmark.py
@@ -0,0 +1,9 @@
+import google_benchmark
+
+# explicit imports to register all benchmarks
+import reducer_benchmark  # noqa
+import misc_benchmark  # noqa
+
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
new file mode 100644
index 0000000000..ffa588775e
--- /dev/null
+++ b/benchmarks/compare.py
@@ -0,0 +1,135 @@
+import json
+import sys
+import os
+import pathlib
+
+
+def load_json(filepath):
+    with open(filepath, "r") as f:
+        return json.load(f)
+
+
+def relative_difference(val1, val2):
+    return abs(val1 - val2) / min(val1, val2)
+
+
+def format_benchmark_name(name: str) -> str:
+    try:
+        parts = name.split("/")
+        base = parts[0]
+        params = {k: v for k, v in (part.split("=", 1) for part in parts[1:])}
+
+        array = params.pop("array", "??")
+        length = params.pop("length", "??")
+        dtype = params.pop("dtype", "").replace("'", "")
+        dtype_short = {
+            "float64": "f64",
+            "float32": "f32",
+            "int64": "i64",
+            "int32": "i32",
+        }.get(dtype, dtype)
+
+        pretty_name = f"{base}({array}<{length},{dtype_short}>"
+
+        # any extra parameters to the function, e.g. `axis=0`
+        for k,v in params.items():
+            pretty_name += f", {k}={v}"
+
+        pretty_name += ")"
+        return pretty_name
+    except Exception:
+        return name  # fallback
+
+
+def compare_benchmarks(
+    file1_path,
+    file2_path,
+    output_path,
+    threshold=0.1,
+):
+    data1 = load_json(file1_path)
+    data2 = load_json(file2_path)
+
+    bm1 = {b["name"]: b for b in data1["benchmarks"]}
+    bm2 = {b["name"]: b for b in data2["benchmarks"]}
+
+    file1_short = os.path.basename(file1_path)
+    file2_short = os.path.basename(file2_path)
+
+    output_lines = []
+
+    found_diffs = False
+
+    for name in bm1:
+        if name in bm2:
+            b1 = bm1[name]
+            b2 = bm2[name]
+
+            cpu1 = b1["cpu_time"]
+            cpu2 = b2["cpu_time"]
+            rel_diff = relative_difference(cpu1, cpu2)
+
+            if rel_diff > threshold:
+                found_diffs = True
+                display_name = format_benchmark_name(name)
+
+                direction = "🟢 **Improvement**" if cpu2 < cpu1 else "🔴 **Regression**"
+                diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}"
+
+                output_lines.append(f"### 🔹 {display_name}\n")
+                output_lines.append(diff_line + "\n")
+
+                # Collapsed detailed comparison
+                output_lines.append(
+                    "<details><summary>Show full comparison</summary>\n\n"
+                )
+
+                headers = ["Metric", f"{file1_short}", f"{file2_short}"]
+                time_unit = b1.get("time_unit", "")
+                assert time_unit == b2.get("time_unit", ""), (
+                    "Can't compare difference units"
+                )
+
+                table = [
+                    "| " + " | ".join(headers) + " |",
+                    "| --- | --- | --- |",
+                    f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |",
+                    f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |",
+                ]
+                eps = "elements_per_second" # potential extra info
+                if eps in b1 and eps in b2:
+                    table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"]
+
+                output_lines.extend(table)
+                output_lines.append("\n</details>\n")
+
+    if not found_diffs:
+        print(f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found.")
+        return
+
+    header = ["## Benchmarks"]
+    markdown_output = "\n".join(header + output_lines)
+
+    # Print to terminal
+    print(markdown_output)
+
+    # Save to file
+    with open(output_file, "w") as f:
+        f.write(markdown_output + "\n")
+
+    print(f"\n✅ Detailed Markdown saved to `{output_file}`")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json")
+        sys.exit(1)
+
+    file1 = sys.argv[1]
+    file2 = sys.argv[2]
+
+    output_file = str(
+        pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results"))
+        / pathlib.Path("comparison.md")
+    )
+    compare_benchmarks(file1, file2, output_file)
diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py
new file mode 100644
index 0000000000..e038ea32ee
--- /dev/null
+++ b/benchmarks/misc_benchmark.py
@@ -0,0 +1,65 @@
+import awkward as ak
+import google_benchmark
+
+from util import benchmark, Jagged, Flat
+
+
+def _prepare_fun_benchmark(fun):
+    return [
+        {
+            "name": f"ak.{fun.__name__}/array={mkarr.__name__}/{length=}/{dtype=}",
+            "mkarr": mkarr,
+            "length": length,
+            "dtype": dtype,
+            "fun": fun,
+        }
+        for mkarr in (Jagged, Flat)
+        for length in [1 << i for i in (12, 16, 20)]
+        for dtype in ["float64"]
+    ]
+
+
+def _general_fun_benchmark(state, **kwargs):
+    mkarr = kwargs["mkarr"]
+    length = kwargs["length"]
+    dtype = kwargs["dtype"]
+    fun = kwargs["fun"]
+
+    # create singely jagged awkward array
+    ak_array = mkarr(length, dtype)
+
+    # run measurement
+    while state:
+        fun(ak_array)
+
+    # track how many elements per second are processed
+    state.counters["elements_per_second"] = google_benchmark.Counter(
+        length * state.iterations, google_benchmark.Counter.kIsRate
+    )
+
+
+# extend for more misc funs
+# some of them don't make much sense to benchmark I suppose...
+FUNS = [
+    ak.angle,
+    ak.drop_none,
+    ak.imag,
+    ak.is_none,
+    ak.is_valid,
+    ak.nan_to_none,
+    ak.nan_to_num,
+    ak.real,
+    ak.round,
+    ak.validity_error,
+]
+
+# register as benchmarks
+for fun in FUNS:
+
+    @benchmark(_prepare_fun_benchmark(fun=fun))
+    def _(state, **kwargs):
+        _general_fun_benchmark(state, **kwargs)
+
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py
new file mode 100644
index 0000000000..eabd534afd
--- /dev/null
+++ b/benchmarks/reducer_benchmark.py
@@ -0,0 +1,68 @@
+import awkward as ak
+import google_benchmark
+
+from util import benchmark, Jagged, Flat
+
+
+def _prepare_reducer_benchmark(reducer):
+    return [
+        {
+            "name": f"ak.{reducer.__name__}/array={mkarr.__name__}/{length=}/{dtype=}/{axis=}",
+            "mkarr": mkarr,
+            "length": length,
+            "dtype": dtype,
+            "reducer": reducer,
+            "axis": axis,
+        }
+        for mkarr in (Jagged, Flat)
+        for length in [1 << i for i in (12, 16, 20)]
+        for dtype in ["float64"]
+        for axis in ([None, 0, 1] if mkarr is Jagged else [None])
+    ]
+
+
+def _general_reducer_benchmark(state, **kwargs):
+    mkarr = kwargs["mkarr"]
+    length = kwargs["length"]
+    dtype = kwargs["dtype"]
+    reducer = kwargs["reducer"]
+    axis = kwargs["axis"]
+
+    # create singely jagged awkward array
+    ak_array = mkarr(length, dtype)
+
+    # run measurement
+    while state:
+        reducer(ak_array, axis=axis)
+
+    # track how many elements per second are processed
+    state.counters["elements_per_second"] = google_benchmark.Counter(
+        length * state.iterations, google_benchmark.Counter.kIsRate
+    )
+
+
+# extend for more reducers
+REDUCERS = [
+    ak.all,
+    ak.any,
+    ak.argmax,
+    ak.argmin,
+    ak.max,
+    ak.mean,
+    ak.min,
+    ak.prod,
+    ak.std,
+    ak.sum,
+    ak.var,
+]
+
+# register as benchmarks
+for reducer in REDUCERS:
+
+    @benchmark(_prepare_reducer_benchmark(reducer=reducer))
+    def _(state, **kwargs):
+        _general_reducer_benchmark(state, **kwargs)
+
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/requirements-benchmark.txt b/benchmarks/requirements-benchmark.txt
new file mode 100644
index 0000000000..1c47cea3a0
--- /dev/null
+++ b/benchmarks/requirements-benchmark.txt
@@ -0,0 +1 @@
+google-benchmark
diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh
new file mode 100755
index 0000000000..07340addbc
--- /dev/null
+++ b/benchmarks/run_action.sh
@@ -0,0 +1,49 @@
+#!/usr/bin/env bash
+
+action() {
+    # This is for the HEAD@PR (including main merged)
+
+    # setup output dir
+    local current_git_hash=$(git rev-parse --verify HEAD)
+    local results_dir=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${current_git_hash}
+    local output_path_feature=${results_dir}/${bm_script}.json
+
+    # Temporarily merge the target branch
+    git checkout -b pr_branch
+    git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on
+    git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed
+    git merge --no-commit --no-ff origin/${TARGET_BRANCH} || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false)
+
+    # create
+    mkdir -p $results_dir
+
+    local bm_script="benchmark.py"
+
+    python $bm_script \
+        --benchmark_time_unit=ms \
+        --benchmark_out=${output_path_feature} \
+        --benchmark_out_format=json
+
+
+    # This is for HEAD@main (usually main, not necessarily though)
+    git stash
+    git checkout origin/${TARGET_BRANCH}
+
+    local current_git_hash=$(git rev-parse --verify HEAD)
+    local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash}
+    local output_path_target=${results_dir}/${bm_script}.json
+
+    # create
+    mkdir -p $results_dir
+
+    local bm_script="benchmark.py"
+
+    python $bm_script \
+        --benchmark_time_unit=ms \
+        --benchmark_out=${output_path_target} \
+        --benchmark_out_format=json
+
+    # Compare both
+    python compare.py ${output_path_target} ${output_path_feature}
+}
+action "$@"
diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh
new file mode 100755
index 0000000000..f797094f15
--- /dev/null
+++ b/benchmarks/run_local.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+action() {
+    # setup output dir
+    local current_git_hash=$(git rev-parse --verify HEAD)
+    local results_dir=results/${current_git_hash}
+
+    # create
+    mkdir -p $results_dir
+
+    local bm_script="benchmark.py"
+
+    python $bm_script \
+        --benchmark_time_unit=ms \
+        --benchmark_color=true \
+        --benchmark_out=$results_dir/$bm_script.json \
+        --benchmark_out_format=json
+}
+action "$@"
diff --git a/benchmarks/util.py b/benchmarks/util.py
new file mode 100644
index 0000000000..963b5e612d
--- /dev/null
+++ b/benchmarks/util.py
@@ -0,0 +1,51 @@
+import functools
+import math
+import awkward as ak
+import numpy as np
+import google_benchmark
+
+
+# reproducible rng
+def rng():
+    return np.random.default_rng(seed=42)
+
+
+def benchmark(*args):
+    def decorator(func):
+        for test_case in args[0]:
+
+            @google_benchmark.register(name=test_case["name"])
+            @functools.wraps(func)
+            def wrapper(state, test_case=test_case):
+                return func(state, **test_case)
+
+        return wrapper
+
+    return decorator
+
+
+def _generate_counts(sum_upto: int, how_many: int) -> np.ndarray:
+    counts = rng().multinomial(
+        sum_upto, rng().dirichlet(np.ones(how_many) * 0.3)
+    ).astype("int")
+    assert np.sum(counts) == sum_upto
+    return counts
+
+
+def Jagged(length, dtype):
+    """creates a singely jagged array"""
+    flat_content = rng().random(length, dtype)
+
+    # seems like a reasonable heuristic
+    powof2 = int(math.log(length) / math.log(2))
+    how_many = (1 << (powof2 // 2)) * 10
+
+    assert how_many < length
+
+    counts = _generate_counts(length, how_many)
+    return ak.unflatten(flat_content, counts)
+
+
+def Flat(length, dtype):
+    """creates a flat array"""
+    return ak.Array(rng().random(length, dtype))

From 0c6e14f4ce1c29b783407ee76c856909dc95618f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 16 Jun 2025 17:34:01 +0000
Subject: [PATCH 3/9] style: pre-commit fixes

---
 benchmarks/benchmark.py         |  5 +++--
 benchmarks/compare.py           | 14 +++++++++-----
 benchmarks/misc_benchmark.py    |  6 ++++--
 benchmarks/reducer_benchmark.py |  6 ++++--
 benchmarks/util.py              | 16 +++++++++++-----
 5 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
index 586932f284..efaba33490 100644
--- a/benchmarks/benchmark.py
+++ b/benchmarks/benchmark.py
@@ -1,9 +1,10 @@
+from __future__ import annotations
+
 import google_benchmark
+import misc_benchmark  # noqa
 
 # explicit imports to register all benchmarks
 import reducer_benchmark  # noqa
-import misc_benchmark  # noqa
-
 
 if __name__ == "__main__":
     google_benchmark.main()
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
index ffa588775e..38532b87b4 100644
--- a/benchmarks/compare.py
+++ b/benchmarks/compare.py
@@ -1,11 +1,13 @@
+from __future__ import annotations
+
 import json
-import sys
 import os
 import pathlib
+import sys
 
 
 def load_json(filepath):
-    with open(filepath, "r") as f:
+    with open(filepath) as f:
         return json.load(f)
 
 
@@ -32,7 +34,7 @@ def format_benchmark_name(name: str) -> str:
         pretty_name = f"{base}({array}<{length},{dtype_short}>"
 
         # any extra parameters to the function, e.g. `axis=0`
-        for k,v in params.items():
+        for k, v in params.items():
             pretty_name += f", {k}={v}"
 
         pretty_name += ")"
@@ -96,7 +98,7 @@ def compare_benchmarks(
                     f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |",
                     f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |",
                 ]
-                eps = "elements_per_second" # potential extra info
+                eps = "elements_per_second"  # potential extra info
                 if eps in b1 and eps in b2:
                     table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"]
 
@@ -104,7 +106,9 @@ def compare_benchmarks(
                 output_lines.append("\n</details>\n")
 
     if not found_diffs:
-        print(f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found.")
+        print(
+            f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found."
+        )
         return
 
     header = ["## Benchmarks"]
diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py
index e038ea32ee..07c75d54c4 100644
--- a/benchmarks/misc_benchmark.py
+++ b/benchmarks/misc_benchmark.py
@@ -1,7 +1,9 @@
-import awkward as ak
+from __future__ import annotations
+
 import google_benchmark
+from util import Flat, Jagged, benchmark
 
-from util import benchmark, Jagged, Flat
+import awkward as ak
 
 
 def _prepare_fun_benchmark(fun):
diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py
index eabd534afd..86b20dbc9e 100644
--- a/benchmarks/reducer_benchmark.py
+++ b/benchmarks/reducer_benchmark.py
@@ -1,7 +1,9 @@
-import awkward as ak
+from __future__ import annotations
+
 import google_benchmark
+from util import Flat, Jagged, benchmark
 
-from util import benchmark, Jagged, Flat
+import awkward as ak
 
 
 def _prepare_reducer_benchmark(reducer):
diff --git a/benchmarks/util.py b/benchmarks/util.py
index 963b5e612d..fb1cfb2f94 100644
--- a/benchmarks/util.py
+++ b/benchmarks/util.py
@@ -1,8 +1,12 @@
+from __future__ import annotations
+
 import functools
 import math
-import awkward as ak
-import numpy as np
+
 import google_benchmark
+import numpy as np
+
+import awkward as ak
 
 
 # reproducible rng
@@ -25,9 +29,11 @@ def wrapper(state, test_case=test_case):
 
 
 def _generate_counts(sum_upto: int, how_many: int) -> np.ndarray:
-    counts = rng().multinomial(
-        sum_upto, rng().dirichlet(np.ones(how_many) * 0.3)
-    ).astype("int")
+    counts = (
+        rng()
+        .multinomial(sum_upto, rng().dirichlet(np.ones(how_many) * 0.3))
+        .astype("int")
+    )
     assert np.sum(counts) == sum_upto
     return counts
 

From fdc73ae21c710589bb2265288adfd7e464871d9f Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Mon, 16 Jun 2025 13:48:48 -0400
Subject: [PATCH 4/9] satisfy precommit

---
 benchmarks/benchmark.py  |  4 ++--
 benchmarks/compare.py    | 12 ++++++------
 benchmarks/run_action.sh | 14 +++++++-------
 benchmarks/run_local.sh  |  4 ++--
 benchmarks/util.py       |  2 +-
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
index efaba33490..943e37b5bf 100644
--- a/benchmarks/benchmark.py
+++ b/benchmarks/benchmark.py
@@ -1,10 +1,10 @@
 from __future__ import annotations
 
 import google_benchmark
-import misc_benchmark  # noqa
 
 # explicit imports to register all benchmarks
-import reducer_benchmark  # noqa
+import misc_benchmark  # noqa: F401
+import reducer_benchmark  # noqa: F401
 
 if __name__ == "__main__":
     google_benchmark.main()
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
index 38532b87b4..0998bf042a 100644
--- a/benchmarks/compare.py
+++ b/benchmarks/compare.py
@@ -19,7 +19,7 @@ def format_benchmark_name(name: str) -> str:
     try:
         parts = name.split("/")
         base = parts[0]
-        params = {k: v for k, v in (part.split("=", 1) for part in parts[1:])}
+        params = dict(part.split("=", 1) for part in parts[1:])
 
         array = params.pop("array", "??")
         length = params.pop("length", "??")
@@ -62,7 +62,7 @@ def compare_benchmarks(
 
     found_diffs = False
 
-    for name in bm1:
+    for name in bm1:  # noqa: PLC0206
         if name in bm2:
             b1 = bm1[name]
             b2 = bm2[name]
@@ -106,7 +106,7 @@ def compare_benchmarks(
                 output_lines.append("\n</details>\n")
 
     if not found_diffs:
-        print(
+        print(  # noqa: T201
             f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found."
         )
         return
@@ -115,18 +115,18 @@ def compare_benchmarks(
     markdown_output = "\n".join(header + output_lines)
 
     # Print to terminal
-    print(markdown_output)
+    print(markdown_output)  # noqa: T201
 
     # Save to file
     with open(output_file, "w") as f:
         f.write(markdown_output + "\n")
 
-    print(f"\n✅ Detailed Markdown saved to `{output_file}`")
+    print(f"\n✅ Detailed Markdown saved to `{output_file}`")  # noqa: T201
 
 
 if __name__ == "__main__":
     if len(sys.argv) != 3:
-        print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json")
+        print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json")  # noqa: T201
         sys.exit(1)
 
     file1 = sys.argv[1]
diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh
index 07340addbc..c387541c49 100755
--- a/benchmarks/run_action.sh
+++ b/benchmarks/run_action.sh
@@ -12,38 +12,38 @@ action() {
     git checkout -b pr_branch
     git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on
     git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed
-    git merge --no-commit --no-ff origin/${TARGET_BRANCH} || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false)
+    git merge --no-commit --no-ff origin/"${TARGET_BRANCH}" || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false)
 
     # create
-    mkdir -p $results_dir
+    mkdir -p "$results_dir"
 
     local bm_script="benchmark.py"
 
     python $bm_script \
         --benchmark_time_unit=ms \
-        --benchmark_out=${output_path_feature} \
+        --benchmark_out="${output_path_feature}" \
         --benchmark_out_format=json
 
 
     # This is for HEAD@main (usually main, not necessarily though)
     git stash
-    git checkout origin/${TARGET_BRANCH}
+    git checkout origin/"${TARGET_BRANCH}"
 
     local current_git_hash=$(git rev-parse --verify HEAD)
     local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash}
     local output_path_target=${results_dir}/${bm_script}.json
 
     # create
-    mkdir -p $results_dir
+    mkdir -p "$results_dir"
 
     local bm_script="benchmark.py"
 
     python $bm_script \
         --benchmark_time_unit=ms \
-        --benchmark_out=${output_path_target} \
+        --benchmark_out="${output_path_target}" \
         --benchmark_out_format=json
 
     # Compare both
-    python compare.py ${output_path_target} ${output_path_feature}
+    python compare.py "${output_path_target}" "${output_path_feature}"
 }
 action "$@"
diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh
index f797094f15..cd4873b185 100755
--- a/benchmarks/run_local.sh
+++ b/benchmarks/run_local.sh
@@ -6,14 +6,14 @@ action() {
     local results_dir=results/${current_git_hash}
 
     # create
-    mkdir -p $results_dir
+    mkdir -p "$results_dir"
 
     local bm_script="benchmark.py"
 
     python $bm_script \
         --benchmark_time_unit=ms \
         --benchmark_color=true \
-        --benchmark_out=$results_dir/$bm_script.json \
+        --benchmark_out="$results_dir"/$bm_script.json \
         --benchmark_out_format=json
 }
 action "$@"
diff --git a/benchmarks/util.py b/benchmarks/util.py
index fb1cfb2f94..fbd85f7f65 100644
--- a/benchmarks/util.py
+++ b/benchmarks/util.py
@@ -4,7 +4,7 @@
 import math
 
 import google_benchmark
-import numpy as np
+import numpy as np  # noqa: TID251
 
 import awkward as ak
 

From 9e7e05ef2d917ba3dc2f71e2273bbd11a0878619 Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Mon, 16 Jun 2025 13:51:23 -0400
Subject: [PATCH 5/9] try this syntax

---
 .github/workflows/benchmark.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index d7e8f65c78..3ce3ace34c 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -109,7 +109,7 @@ jobs:
 
       - name: Comment on PR
         uses: actions/github-script@v7
-        if: ${{ hashFiles(format('{0}/comparison.md', ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }})) != '' }} # if there's no comparison.md, we won't post anything
+        if: ${{ hashFiles(format('{0}/comparison.md', steps.benchmark_and_compare.env.BASE_OUTPUT_DIR)) != '' }} # if there's no comparison.md, we won't post anything
         with:
           script: |
             github.rest.issues.createComment({

From e8dff90584d207f3cc9010121ae3baffa5cd9847 Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Mon, 16 Jun 2025 14:52:26 -0400
Subject: [PATCH 6/9] satisfy shellcheck

---
 benchmarks/run_action.sh | 7 +++++--
 benchmarks/run_local.sh  | 3 ++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh
index c387541c49..807526b46a 100755
--- a/benchmarks/run_action.sh
+++ b/benchmarks/run_action.sh
@@ -4,7 +4,8 @@ action() {
     # This is for the HEAD@PR (including main merged)
 
     # setup output dir
-    local current_git_hash=$(git rev-parse --verify HEAD)
+    local current_git_hash
+    current_git_hash=$(git rev-parse --verify HEAD)
     local results_dir=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${current_git_hash}
     local output_path_feature=${results_dir}/${bm_script}.json
 
@@ -12,6 +13,7 @@ action() {
     git checkout -b pr_branch
     git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on
     git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed
+    # shellcheck disable=SC2028
     git merge --no-commit --no-ff origin/"${TARGET_BRANCH}" || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false)
 
     # create
@@ -29,7 +31,8 @@ action() {
     git stash
     git checkout origin/"${TARGET_BRANCH}"
 
-    local current_git_hash=$(git rev-parse --verify HEAD)
+    local current_git_hash
+    current_git_hash=$(git rev-parse --verify HEAD)
     local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash}
     local output_path_target=${results_dir}/${bm_script}.json
 
diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh
index cd4873b185..19a3370169 100755
--- a/benchmarks/run_local.sh
+++ b/benchmarks/run_local.sh
@@ -2,7 +2,8 @@
 
 action() {
     # setup output dir
-    local current_git_hash=$(git rev-parse --verify HEAD)
+    local current_git_hash
+    current_git_hash=$(git rev-parse --verify HEAD)
     local results_dir=results/${current_git_hash}
 
     # create

From 1f8eae088ec5387828a7844dd5225a1c91c34087 Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Mon, 16 Jun 2025 14:55:53 -0400
Subject: [PATCH 7/9] try fix getting target branch name

---
 .github/workflows/benchmark.yml | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 3ce3ace34c..1f8daaddc2 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -81,28 +81,15 @@ jobs:
       - name: Print versions
         run: python -m pip list
 
-      - name: Get PR target branch
-        id: get_target_branch
-        uses: actions/github-script@v7
-        with:
-          result-encoding: string
-          script: |
-            const { data: pullRequest } = await github.rest.pulls.get({
-              owner: context.repo.owner,
-              repo: "awkward",
-              pull_number: "${{ inputs.pr-number }}",
-            });
-            return pullRequest.base.ref;
-
       - name: Run Benchmark and Comparisons
         id: benchmark_and_compare
         shell: bash
         run: |
           cd benchmarks/
           ./run_action.sh
-          echo "comparison='$(cat BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT
+          echo "comparison='$(cat $BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT
         env:
-          TARGET_BRANCH: ${{ steps.get_target_branch.outputs.result }} # usually: main
+          TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} # usually: main
           BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch
           BASE_OUTPUT_DIR: results_PR${{ inputs.pr-number }}
           continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps

From 3e2bf2d5949cba98378a7056f6800942b5210789 Mon Sep 17 00:00:00 2001
From: Peter Fackeldey <fackeldey.peter@gmail.com>
Date: Tue, 17 Jun 2025 11:40:00 -0400
Subject: [PATCH 8/9] fix: debug benchmarks (#3550)

* debug test 1

* fix output piping for benchmark comparisons

* try handling multiline outputs

* hopefully fix paths

* another try to pass multiline output

* try fix json file names

* fix directory creation for benchmark results

* please precommit

* prettify table headers

* style: pre-commit fixes

* go back to original commit for comparison

* prettify branch name and SHA in table header

* style: pre-commit fixes

* try self-hosted runner

* another try with self-hosted runner

* another try

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .github/workflows/benchmark.yml | 69 ++++++++++++++++++++-------------
 benchmarks/compare.py           | 18 +++++++--
 benchmarks/misc_benchmark.py    |  1 -
 benchmarks/run_action.sh        | 30 +++++++-------
 4 files changed, 71 insertions(+), 47 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 1f8daaddc2..0051faffc6 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -20,44 +20,48 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  run-tests:
+  run-benchmarks:
     name: Run Benchmarks
-    strategy:
-      fail-fast: false
-      matrix:
-        runs-on:
-          - ubuntu-latest
-        python-version:
-          - "3.13"
-        python-architecture:
-          - x64
-
-    runs-on: ${{ matrix.runs-on }}
+
+    runs-on: self-hosted
 
     env:
       PIP_ONLY_BINARY: numpy
 
+    # Required for miniconda to activate conda
+    defaults:
+      run:
+        shell: bash -l {0}
+
     steps:
+      - name: Clean the workspace and mamba
+        run: |
+          rm -rf * .[!.]* || echo "Nothing to clean"
+          rm -rf ~/micromamba* || echo "Nothing to clean"
+
       - uses: actions/checkout@v4
         with:
           submodules: true
 
-      - name: "Python ${{ matrix.python-version }}"
-        uses: actions/setup-python@v5
+      - name: Get micromamba
+        uses: mamba-org/setup-micromamba@v2
         with:
-          python-version: "${{ matrix.python-version }}"
-          architecture: "${{ matrix.python-architecture }}"
-          allow-prereleases: true
+          environment-name: test-env
+          init-shell: bash
+          create-args: >-
+            python=3.13
 
       - name: Generate build files
-        run: pipx run nox -s prepare -- --headers --signatures --tests
+        run: |
+          pip install pipx
+          pipx run nox -s prepare -- --headers --signatures --tests
 
       - name: Cache awkward-cpp wheel
         id: cache-awkward-cpp-wheel
         uses: actions/cache@v4
         with:
           path: awkward-cpp/dist
-          key: ${{ github.job }}-${{ matrix.runs-on }}-${{ matrix.python-version }}-${{ matrix.python-architecture }}-${{ hashFiles('awkward-cpp/**') }}
+          key: ${{ github.job }}-${{ hashFiles('awkward-cpp/**') }}
 
       - name: Build awkward-cpp wheel
         if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true'
@@ -72,6 +76,10 @@ jobs:
           files: |
             awkward-cpp/dist/*.whl
 
+      - name: Add workaround for 3.13 + cramjam
+        run: echo 'PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1' >> $GITHUB_ENV
+        shell: bash
+
       - name: Install awkward, awkward-cpp, and dependencies
         run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures
 
@@ -83,28 +91,37 @@ jobs:
 
       - name: Run Benchmark and Comparisons
         id: benchmark_and_compare
-        shell: bash
         run: |
           cd benchmarks/
           ./run_action.sh
-          echo "comparison='$(cat $BASE_OUTPUT_DIR)/comparison.md'" >> $GITHUB_OUTPUT
+          COMPARISON=$(cat $(echo $BASE_OUTPUT_DIR)/comparison.md)
+          {
+            echo "comparison<<EOF"
+            echo "${COMPARISON}"
+            echo "EOF"
+          } >> $GITHUB_OUTPUT
+          cd ..
         env:
           TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} # usually: main
           BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch
-          BASE_OUTPUT_DIR: results_PR${{ inputs.pr-number }}
-          continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps
+          BASE_OUTPUT_DIR: results
+        continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps
 
       - name: Comment on PR
         uses: actions/github-script@v7
-        if: ${{ hashFiles(format('{0}/comparison.md', steps.benchmark_and_compare.env.BASE_OUTPUT_DIR)) != '' }} # if there's no comparison.md, we won't post anything
+        if: ${{ hashFiles('benchmarks/results/comparison.md') != '' }} # if there's no comparison.md, we won't post anything
         with:
           script: |
             github.rest.issues.createComment({
               issue_number: context.issue.number,
               owner: context.repo.owner,
               repo: context.repo.repo,
-              body: `${{ steps.benchmark_and_compare.outputs.comparison }}`
+              body: process.env.COMPARISON
             })
+        env:
+          COMPARISON: ${{ steps.benchmark_and_compare.outputs.comparison }}
 
       - name: Cleanup Benchmark Outputs
-        run: rm -r ${{ steps.benchmark_and_compare.env.BASE_OUTPUT_DIR }}
+        run: rm -r benchmarks/$BASE_OUTPUT_DIR
+        env:
+          BASE_OUTPUT_DIR: results
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
index 0998bf042a..66c29240ab 100644
--- a/benchmarks/compare.py
+++ b/benchmarks/compare.py
@@ -55,9 +55,6 @@ def compare_benchmarks(
     bm1 = {b["name"]: b for b in data1["benchmarks"]}
     bm2 = {b["name"]: b for b in data2["benchmarks"]}
 
-    file1_short = os.path.basename(file1_path)
-    file2_short = os.path.basename(file2_path)
-
     output_lines = []
 
     found_diffs = False
@@ -86,7 +83,20 @@ def compare_benchmarks(
                     "<details><summary>Show full comparison</summary>\n\n"
                 )
 
-                headers = ["Metric", f"{file1_short}", f"{file2_short}"]
+                def _parse_branch_sha(file_path):
+                    assert file_path.endswith(".json")
+                    file_path = file_path.replace(".json", "")  # remove file ending
+                    file_path = file_path.replace(
+                        os.getenv("BASE_OUTPUT_DIR", "results") + "/", "", 1
+                    )  # remove base dir
+                    branch, sha = file_path.rsplit("__", 1)  # get branch & SHA
+                    return f"branch: `{branch}` (sha: {sha})"
+
+                headers = [
+                    "Metric",
+                    _parse_branch_sha(file1_path),
+                    _parse_branch_sha(file2_path),
+                ]
                 time_unit = b1.get("time_unit", "")
                 assert time_unit == b2.get("time_unit", ""), (
                     "Can't compare difference units"
diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py
index 07c75d54c4..0419ec9126 100644
--- a/benchmarks/misc_benchmark.py
+++ b/benchmarks/misc_benchmark.py
@@ -41,7 +41,6 @@ def _general_fun_benchmark(state, **kwargs):
 
 
 # extend for more misc funs
-# some of them don't make much sense to benchmark I suppose...
 FUNS = [
     ak.angle,
     ak.drop_none,
diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh
index 807526b46a..7131f2e8ed 100755
--- a/benchmarks/run_action.sh
+++ b/benchmarks/run_action.sh
@@ -4,10 +4,9 @@ action() {
     # This is for the HEAD@PR (including main merged)
 
     # setup output dir
-    local current_git_hash
-    current_git_hash=$(git rev-parse --verify HEAD)
-    local results_dir=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${current_git_hash}
-    local output_path_feature=${results_dir}/${bm_script}.json
+    local orig_git_hash
+    orig_git_hash=$(git rev-parse --verify HEAD)
+    local output_path_feature=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${orig_git_hash}.json
 
     # Temporarily merge the target branch
     git checkout -b pr_branch
@@ -17,11 +16,9 @@ action() {
     git merge --no-commit --no-ff origin/"${TARGET_BRANCH}" || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false)
 
     # create
-    mkdir -p "$results_dir"
+    mkdir -p "$(dirname "${output_path_feature}")"
 
-    local bm_script="benchmark.py"
-
-    python $bm_script \
+    python benchmark.py \
         --benchmark_time_unit=ms \
         --benchmark_out="${output_path_feature}" \
         --benchmark_out_format=json
@@ -31,22 +28,23 @@ action() {
     git stash
     git checkout origin/"${TARGET_BRANCH}"
 
-    local current_git_hash
-    current_git_hash=$(git rev-parse --verify HEAD)
-    local results_dir=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${current_git_hash}
-    local output_path_target=${results_dir}/${bm_script}.json
+    local target_git_hash
+    target_git_hash=$(git rev-parse --verify HEAD)
+    local output_path_target=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${target_git_hash}.json
 
     # create
-    mkdir -p "$results_dir"
-
-    local bm_script="benchmark.py"
+    mkdir -p "$(dirname "${output_path_target}")"
 
-    python $bm_script \
+    python benchmark.py \
         --benchmark_time_unit=ms \
         --benchmark_out="${output_path_target}" \
         --benchmark_out_format=json
 
     # Compare both
+    # first: switch back to original commit
+    git stash
+    git checkout "${orig_git_hash}"
+
     python compare.py "${output_path_target}" "${output_path_feature}"
 }
 action "$@"

From 6d7517dcc0e0ef8efba2b75cae4a2fbab460fd3d Mon Sep 17 00:00:00 2001
From: pfackeldey <fackeldey.peter@gmail.com>
Date: Tue, 17 Jun 2025 11:48:39 -0400
Subject: [PATCH 9/9] prettify python side

---
 benchmarks/compare.py           | 34 ++++-----------------------------
 benchmarks/misc_benchmark.py    | 17 ++++++++++++++---
 benchmarks/reducer_benchmark.py | 14 +++++++++++---
 benchmarks/util.py              | 21 ++++++++++++++++++++
 4 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/benchmarks/compare.py b/benchmarks/compare.py
index 66c29240ab..22034f6e69 100644
--- a/benchmarks/compare.py
+++ b/benchmarks/compare.py
@@ -15,34 +15,6 @@ def relative_difference(val1, val2):
     return abs(val1 - val2) / min(val1, val2)
 
 
-def format_benchmark_name(name: str) -> str:
-    try:
-        parts = name.split("/")
-        base = parts[0]
-        params = dict(part.split("=", 1) for part in parts[1:])
-
-        array = params.pop("array", "??")
-        length = params.pop("length", "??")
-        dtype = params.pop("dtype", "").replace("'", "")
-        dtype_short = {
-            "float64": "f64",
-            "float32": "f32",
-            "int64": "i64",
-            "int32": "i32",
-        }.get(dtype, dtype)
-
-        pretty_name = f"{base}({array}<{length},{dtype_short}>"
-
-        # any extra parameters to the function, e.g. `axis=0`
-        for k, v in params.items():
-            pretty_name += f", {k}={v}"
-
-        pretty_name += ")"
-        return pretty_name
-    except Exception:
-        return name  # fallback
-
-
 def compare_benchmarks(
     file1_path,
     file2_path,
@@ -70,7 +42,7 @@ def compare_benchmarks(
 
             if rel_diff > threshold:
                 found_diffs = True
-                display_name = format_benchmark_name(name)
+                display_name = name
 
                 direction = "🟢 **Improvement**" if cpu2 < cpu1 else "🔴 **Regression**"
                 diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}"
@@ -146,4 +118,6 @@ def _parse_branch_sha(file_path):
         pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results"))
         / pathlib.Path("comparison.md")
     )
-    compare_benchmarks(file1, file2, output_file)
+    compare_benchmarks(
+        file1, file2, output_file, threshold=0.1
+    )  # increase threshold if it's too noisy
diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py
index 0419ec9126..89b881ae93 100644
--- a/benchmarks/misc_benchmark.py
+++ b/benchmarks/misc_benchmark.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import google_benchmark
-from util import Flat, Jagged, benchmark
+from util import Jagged, benchmark, format_benchmark_name
 
 import awkward as ak
 
@@ -9,13 +9,20 @@
 def _prepare_fun_benchmark(fun):
     return [
         {
-            "name": f"ak.{fun.__name__}/array={mkarr.__name__}/{length=}/{dtype=}",
+            "name": format_benchmark_name(
+                {
+                    "op_name": fun.__name__,
+                    "array": mkarr.__name__,
+                    "length": length,
+                    "dtype": dtype,
+                }
+            ),
             "mkarr": mkarr,
             "length": length,
             "dtype": dtype,
             "fun": fun,
         }
-        for mkarr in (Jagged, Flat)
+        for mkarr in [Jagged]
         for length in [1 << i for i in (12, 16, 20)]
         for dtype in ["float64"]
     ]
@@ -30,6 +37,10 @@ def _general_fun_benchmark(state, **kwargs):
     # create singely jagged awkward array
     ak_array = mkarr(length, dtype)
 
+    # for ak.imag/real we need to add imaginary component
+    if fun in (ak.imag, ak.real):
+        ak_array = ak_array + 1j * ak_array
+
     # run measurement
     while state:
         fun(ak_array)
diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py
index 86b20dbc9e..779113e4b7 100644
--- a/benchmarks/reducer_benchmark.py
+++ b/benchmarks/reducer_benchmark.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import google_benchmark
-from util import Flat, Jagged, benchmark
+from util import Jagged, benchmark, format_benchmark_name
 
 import awkward as ak
 
@@ -9,14 +9,22 @@
 def _prepare_reducer_benchmark(reducer):
     return [
         {
-            "name": f"ak.{reducer.__name__}/array={mkarr.__name__}/{length=}/{dtype=}/{axis=}",
+            "name": format_benchmark_name(
+                {
+                    "op_name": reducer.__name__,
+                    "array": mkarr.__name__,
+                    "length": length,
+                    "dtype": dtype,
+                    "axis": axis,
+                }
+            ),
             "mkarr": mkarr,
             "length": length,
             "dtype": dtype,
             "reducer": reducer,
             "axis": axis,
         }
-        for mkarr in (Jagged, Flat)
+        for mkarr in [Jagged]
         for length in [1 << i for i in (12, 16, 20)]
         for dtype in ["float64"]
         for axis in ([None, 0, 1] if mkarr is Jagged else [None])
diff --git a/benchmarks/util.py b/benchmarks/util.py
index fbd85f7f65..b0ee6a096d 100644
--- a/benchmarks/util.py
+++ b/benchmarks/util.py
@@ -55,3 +55,24 @@ def Jagged(length, dtype):
 def Flat(length, dtype):
     """creates a flat array"""
     return ak.Array(rng().random(length, dtype))
+
+
+def format_benchmark_name(params: dict) -> str:
+    base = "ak." + params.pop("op_name", "??")
+    array = params.pop("array", "??")
+    length = params.pop("length", "??")
+    dtype_short = (
+        params.pop("dtype", "??")
+        .replace("float", "f")
+        .replace("int", "i")
+        .replace("complex", "c")
+    )
+
+    pretty_name = f"{base}({array}<{dtype_short}[{length}]>"
+
+    # any extra parameters to the function, e.g. `axis=0`
+    for k, v in params.items():
+        pretty_name += f", {k}={v}"
+
+    pretty_name += ")"
+    return pretty_name