diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 0000000000..0051faffc6
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,127 @@
+name: Benchmarks
+
+on:
+  pull_request:
+    paths-ignore:
+      - README.md
+      - CONTRIBUTING.md
+      - CITATION.cff
+      - LICENSE
+      - .readthedocs.yml
+      - docs-img/**
+      - docs/**
+      - awkward-cpp/docs/**
+      - studies/**
+
+  workflow_dispatch:
+
+concurrency:
+  group: "benchmark-${{ github.head_ref || github.run_id }}"
+  cancel-in-progress: true
+
+jobs:
+  run-benchmarks:
+    name: Run Benchmarks
+
+    runs-on: self-hosted
+
+    env:
+      PIP_ONLY_BINARY: numpy
+
+    # Required for miniconda to activate conda
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+      - name: Clean the workspace and mamba
+        run: |
+          rm -rf * .[!.]* || echo "Nothing to clean"
+          rm -rf ~/micromamba* || echo "Nothing to clean"
+
+      - uses: actions/checkout@v4
+        with:
+          submodules: true
+
+      - name: Get micromamba
+        uses: mamba-org/setup-micromamba@v2
+        with:
+          environment-name: test-env
+          init-shell: bash
+          create-args: >-
+            python=3.13
+
+      - name: Generate build files
+        run: |
+          pip install pipx
+          pipx run nox -s prepare -- --headers --signatures --tests
+
+      - name: Cache awkward-cpp wheel
+        id: cache-awkward-cpp-wheel
+        uses: actions/cache@v4
+        with:
+          path: awkward-cpp/dist
+          key: ${{ github.job }}-${{ hashFiles('awkward-cpp/**') }}
+
+      - name: Build awkward-cpp wheel
+        if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true'
+        run: |
+          python -m pip install build
+          python -m build -w awkward-cpp
+
+      - name: Find built wheel
+        uses: tj-actions/glob@v22
+        id: find-wheel
+        with:
+          files: |
+            awkward-cpp/dist/*.whl
+
+      - name: Add workaround for 3.13 + cramjam
+        run: echo 'PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1' >> $GITHUB_ENV
+        shell: bash
+
+      - name: Install awkward, awkward-cpp, and dependencies
+        run: python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures
+
+      - name: Setup Benchmark Env
+        run: python -m pip install -r benchmarks/requirements-benchmark.txt
+
+      - name: Print versions
+        run: python -m pip list
+
+      - name: Run Benchmark and Comparisons
+        id: benchmark_and_compare
+        run: |
+          cd benchmarks/
+          ./run_action.sh
+          COMPARISON=$(cat $(echo $BASE_OUTPUT_DIR)/comparison.md)
+          {
+            echo "comparison<<EOF"
+            echo "${COMPARISON}"
+            echo "EOF"
+          } >> $GITHUB_OUTPUT
+          cd ..
+        env:
+          TARGET_BRANCH: ${{ github.event.pull_request.base.ref }} # usually: main
+          BRANCH_NAME: ${{ github.head_ref || github.ref_name }} # feature branch
+          BASE_OUTPUT_DIR: results
+        continue-on-error: true # failed benchmarking shouldn't stop the rest of the steps
+
+      - name: Comment on PR
+        uses: actions/github-script@v7
+        if: ${{ hashFiles('benchmarks/results/comparison.md') != '' }} # if there's no comparison.md, we won't post anything
+        with:
+          script: |
+            github.rest.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: process.env.COMPARISON
+            })
+        env:
+          COMPARISON: ${{ steps.benchmark_and_compare.outputs.comparison }}
+
+      - name: Cleanup Benchmark Outputs
+        run: rm -r benchmarks/$BASE_OUTPUT_DIR
+        env:
+          BASE_OUTPUT_DIR: results
diff --git a/benchmarks/README.md b/benchmarks/README.md
new file mode 100644
index 0000000000..d08d1698cc
--- /dev/null
+++ b/benchmarks/README.md
@@ -0,0 +1,16 @@
+# Benchmarks
+
+Setup env:
+```
+pip install -r requirements-benchmark.txt
+```
+
+Run with:
+```shell
+./run_local.sh
+```
+
+If you have 2 benchmark results to compare:
+```shell
+python compare file1.json file2.json
+```
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
new file mode 100644
index 0000000000..943e37b5bf
--- /dev/null
+++ b/benchmarks/benchmark.py
@@ -0,0 +1,10 @@
+from __future__ import annotations
+
+import google_benchmark
+
+# explicit imports to register all benchmarks
+import misc_benchmark  # noqa: F401
+import reducer_benchmark  # noqa: F401
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
new file mode 100644
index 0000000000..22034f6e69
--- /dev/null
+++ b/benchmarks/compare.py
@@ -0,0 +1,123 @@
+from __future__ import annotations
+
+import json
+import os
+import pathlib
+import sys
+
+
+def load_json(filepath):
+    with open(filepath) as f:
+        return json.load(f)
+
+
+def relative_difference(val1, val2):
+    return abs(val1 - val2) / min(val1, val2)
+
+
+def compare_benchmarks(
+    file1_path,
+    file2_path,
+    output_path,
+    threshold=0.1,
+):
+    data1 = load_json(file1_path)
+    data2 = load_json(file2_path)
+
+    bm1 = {b["name"]: b for b in data1["benchmarks"]}
+    bm2 = {b["name"]: b for b in data2["benchmarks"]}
+
+    output_lines = []
+
+    found_diffs = False
+
+    for name in bm1:  # noqa: PLC0206
+        if name in bm2:
+            b1 = bm1[name]
+            b2 = bm2[name]
+
+            cpu1 = b1["cpu_time"]
+            cpu2 = b2["cpu_time"]
+            rel_diff = relative_difference(cpu1, cpu2)
+
+            if rel_diff > threshold:
+                found_diffs = True
+                display_name = name
+
+                direction = "🟢 **Improvement**" if cpu2 < cpu1 else "🔴 **Regression**"
+                diff_line = f"**Relative CPU Time Difference:** `{rel_diff * 100:.1f}%` — {direction}"
+
+                output_lines.append(f"### 🔹 {display_name}\n")
+                output_lines.append(diff_line + "\n")
+
+                # Collapsed detailed comparison
+                output_lines.append(
+                    "<details><summary>Show full comparison</summary>\n\n"
+                )
+
+                def _parse_branch_sha(file_path):
+                    assert file_path.endswith(".json")
+                    file_path = file_path.replace(".json", "")  # remove file ending
+                    file_path = file_path.replace(
+                        os.getenv("BASE_OUTPUT_DIR", "results") + "/", "", 1
+                    )  # remove base dir
+                    branch, sha = file_path.rsplit("__", 1)  # get branch & SHA
+                    return f"branch: `{branch}` (sha: {sha})"
+
+                headers = [
+                    "Metric",
+                    _parse_branch_sha(file1_path),
+                    _parse_branch_sha(file2_path),
+                ]
+                time_unit = b1.get("time_unit", "")
+                assert time_unit == b2.get("time_unit", ""), (
+                    "Can't compare difference units"
+                )
+
+                table = [
+                    "| " + " | ".join(headers) + " |",
+                    "| --- | --- | --- |",
+                    f"| `cpu_time` ({time_unit}) | {cpu1:.6e} | {cpu2:.6e} |",
+                    f"| `real_time` ({time_unit}) | {b1['real_time']:.6e} | {b2['real_time']:.6e} |",
+                ]
+                eps = "elements_per_second"  # potential extra info
+                if eps in b1 and eps in b2:
+                    table += [f"| `elements/s` (Hz) | {b1[eps]:.2e} | {b2[eps]:.2e} |"]
+
+                output_lines.extend(table)
+                output_lines.append("\n</details>\n")
+
+    if not found_diffs:
+        print(  # noqa: T201
+            f"No significant differences (over {threshold * 100:.1f}%) in cpu_time found."
+        )
+        return
+
+    header = ["## Benchmarks"]
+    markdown_output = "\n".join(header + output_lines)
+
+    # Print to terminal
+    print(markdown_output)  # noqa: T201
+
+    # Save to file
+    with open(output_file, "w") as f:
+        f.write(markdown_output + "\n")
+
+    print(f"\n✅ Detailed Markdown saved to `{output_file}`")  # noqa: T201
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print(f"Usage: python {os.path.basename(__file__)} file1.json file2.json")  # noqa: T201
+        sys.exit(1)
+
+    file1 = sys.argv[1]
+    file2 = sys.argv[2]
+
+    output_file = str(
+        pathlib.Path(os.getenv("BASE_OUTPUT_DIR", "results"))
+        / pathlib.Path("comparison.md")
+    )
+    compare_benchmarks(
+        file1, file2, output_file, threshold=0.1
+    )  # increase threshold if it's too noisy
diff --git a/benchmarks/misc_benchmark.py b/benchmarks/misc_benchmark.py
new file mode 100644
index 0000000000..89b881ae93
--- /dev/null
+++ b/benchmarks/misc_benchmark.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import google_benchmark
+from util import Jagged, benchmark, format_benchmark_name
+
+import awkward as ak
+
+
+def _prepare_fun_benchmark(fun):
+    return [
+        {
+            "name": format_benchmark_name(
+                {
+                    "op_name": fun.__name__,
+                    "array": mkarr.__name__,
+                    "length": length,
+                    "dtype": dtype,
+                }
+            ),
+            "mkarr": mkarr,
+            "length": length,
+            "dtype": dtype,
+            "fun": fun,
+        }
+        for mkarr in [Jagged]
+        for length in [1 << i for i in (12, 16, 20)]
+        for dtype in ["float64"]
+    ]
+
+
+def _general_fun_benchmark(state, **kwargs):
+    mkarr = kwargs["mkarr"]
+    length = kwargs["length"]
+    dtype = kwargs["dtype"]
+    fun = kwargs["fun"]
+
+    # create singely jagged awkward array
+    ak_array = mkarr(length, dtype)
+
+    # for ak.imag/real we need to add imaginary component
+    if fun in (ak.imag, ak.real):
+        ak_array = ak_array + 1j * ak_array
+
+    # run measurement
+    while state:
+        fun(ak_array)
+
+    # track how many elements per second are processed
+    state.counters["elements_per_second"] = google_benchmark.Counter(
+        length * state.iterations, google_benchmark.Counter.kIsRate
+    )
+
+
+# extend for more misc funs
+FUNS = [
+    ak.angle,
+    ak.drop_none,
+    ak.imag,
+    ak.is_none,
+    ak.is_valid,
+    ak.nan_to_none,
+    ak.nan_to_num,
+    ak.real,
+    ak.round,
+    ak.validity_error,
+]
+
+# register as benchmarks
+for fun in FUNS:
+
+    @benchmark(_prepare_fun_benchmark(fun=fun))
+    def _(state, **kwargs):
+        _general_fun_benchmark(state, **kwargs)
+
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/reducer_benchmark.py b/benchmarks/reducer_benchmark.py
new file mode 100644
index 0000000000..779113e4b7
--- /dev/null
+++ b/benchmarks/reducer_benchmark.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import google_benchmark
+from util import Jagged, benchmark, format_benchmark_name
+
+import awkward as ak
+
+
+def _prepare_reducer_benchmark(reducer):
+    return [
+        {
+            "name": format_benchmark_name(
+                {
+                    "op_name": reducer.__name__,
+                    "array": mkarr.__name__,
+                    "length": length,
+                    "dtype": dtype,
+                    "axis": axis,
+                }
+            ),
+            "mkarr": mkarr,
+            "length": length,
+            "dtype": dtype,
+            "reducer": reducer,
+            "axis": axis,
+        }
+        for mkarr in [Jagged]
+        for length in [1 << i for i in (12, 16, 20)]
+        for dtype in ["float64"]
+        for axis in ([None, 0, 1] if mkarr is Jagged else [None])
+    ]
+
+
+def _general_reducer_benchmark(state, **kwargs):
+    mkarr = kwargs["mkarr"]
+    length = kwargs["length"]
+    dtype = kwargs["dtype"]
+    reducer = kwargs["reducer"]
+    axis = kwargs["axis"]
+
+    # create singely jagged awkward array
+    ak_array = mkarr(length, dtype)
+
+    # run measurement
+    while state:
+        reducer(ak_array, axis=axis)
+
+    # track how many elements per second are processed
+    state.counters["elements_per_second"] = google_benchmark.Counter(
+        length * state.iterations, google_benchmark.Counter.kIsRate
+    )
+
+
+# extend for more reducers
+REDUCERS = [
+    ak.all,
+    ak.any,
+    ak.argmax,
+    ak.argmin,
+    ak.max,
+    ak.mean,
+    ak.min,
+    ak.prod,
+    ak.std,
+    ak.sum,
+    ak.var,
+]
+
+# register as benchmarks
+for reducer in REDUCERS:
+
+    @benchmark(_prepare_reducer_benchmark(reducer=reducer))
+    def _(state, **kwargs):
+        _general_reducer_benchmark(state, **kwargs)
+
+
+if __name__ == "__main__":
+    google_benchmark.main()
diff --git a/benchmarks/requirements-benchmark.txt b/benchmarks/requirements-benchmark.txt
new file mode 100644
index 0000000000..1c47cea3a0
--- /dev/null
+++ b/benchmarks/requirements-benchmark.txt
@@ -0,0 +1 @@
+google-benchmark
diff --git a/benchmarks/run_action.sh b/benchmarks/run_action.sh
new file mode 100755
index 0000000000..7131f2e8ed
--- /dev/null
+++ b/benchmarks/run_action.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+action() {
+    # This is for the HEAD@PR (including main merged)
+
+    # setup output dir
+    local orig_git_hash
+    orig_git_hash=$(git rev-parse --verify HEAD)
+    local output_path_feature=${BASE_OUTPUT_DIR}/${BRANCH_NAME}__${orig_git_hash}.json
+
+    # Temporarily merge the target branch
+    git checkout -b pr_branch
+    git fetch --unshallow || echo "" # It might be worth switching actions/checkout to use depth 0 later on
+    git config user.email "gha@example.com" && git config user.name "GHA" # For some reason this is needed even though nothing is being committed
+    # shellcheck disable=SC2028
+    git merge --no-commit --no-ff origin/"${TARGET_BRANCH}" || (echo "***\nError: There are merge conflicts that need to be resolved.\n***" && false)
+
+    # create
+    mkdir -p "$(dirname "${output_path_feature}")"
+
+    python benchmark.py \
+        --benchmark_time_unit=ms \
+        --benchmark_out="${output_path_feature}" \
+        --benchmark_out_format=json
+
+
+    # This is for HEAD@main (usually main, not necessarily though)
+    git stash
+    git checkout origin/"${TARGET_BRANCH}"
+
+    local target_git_hash
+    target_git_hash=$(git rev-parse --verify HEAD)
+    local output_path_target=${BASE_OUTPUT_DIR}/${TARGET_BRANCH}__${target_git_hash}.json
+
+    # create
+    mkdir -p "$(dirname "${output_path_target}")"
+
+    python benchmark.py \
+        --benchmark_time_unit=ms \
+        --benchmark_out="${output_path_target}" \
+        --benchmark_out_format=json
+
+    # Compare both
+    # first: switch back to original commit
+    git stash
+    git checkout "${orig_git_hash}"
+
+    python compare.py "${output_path_target}" "${output_path_feature}"
+}
+action "$@"
diff --git a/benchmarks/run_local.sh b/benchmarks/run_local.sh
new file mode 100755
index 0000000000..19a3370169
--- /dev/null
+++ b/benchmarks/run_local.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+
+action() {
+    # setup output dir
+    local current_git_hash
+    current_git_hash=$(git rev-parse --verify HEAD)
+    local results_dir=results/${current_git_hash}
+
+    # create
+    mkdir -p "$results_dir"
+
+    local bm_script="benchmark.py"
+
+    python $bm_script \
+        --benchmark_time_unit=ms \
+        --benchmark_color=true \
+        --benchmark_out="$results_dir"/$bm_script.json \
+        --benchmark_out_format=json
+}
+action "$@"
diff --git a/benchmarks/util.py b/benchmarks/util.py
new file mode 100644
index 0000000000..b0ee6a096d
--- /dev/null
+++ b/benchmarks/util.py
@@ -0,0 +1,78 @@
+from __future__ import annotations
+
+import functools
+import math
+
+import google_benchmark
+import numpy as np  # noqa: TID251
+
+import awkward as ak
+
+
+# reproducible rng
+def rng():
+    return np.random.default_rng(seed=42)
+
+
+def benchmark(*args):
+    def decorator(func):
+        for test_case in args[0]:
+
+            @google_benchmark.register(name=test_case["name"])
+            @functools.wraps(func)
+            def wrapper(state, test_case=test_case):
+                return func(state, **test_case)
+
+        return wrapper
+
+    return decorator
+
+
+def _generate_counts(sum_upto: int, how_many: int) -> np.ndarray:
+    counts = (
+        rng()
+        .multinomial(sum_upto, rng().dirichlet(np.ones(how_many) * 0.3))
+        .astype("int")
+    )
+    assert np.sum(counts) == sum_upto
+    return counts
+
+
+def Jagged(length, dtype):
+    """creates a singely jagged array"""
+    flat_content = rng().random(length, dtype)
+
+    # seems like a reasonable heuristic
+    powof2 = int(math.log(length) / math.log(2))
+    how_many = (1 << (powof2 // 2)) * 10
+
+    assert how_many < length
+
+    counts = _generate_counts(length, how_many)
+    return ak.unflatten(flat_content, counts)
+
+
+def Flat(length, dtype):
+    """creates a flat array"""
+    return ak.Array(rng().random(length, dtype))
+
+
+def format_benchmark_name(params: dict) -> str:
+    base = "ak." + params.pop("op_name", "??")
+    array = params.pop("array", "??")
+    length = params.pop("length", "??")
+    dtype_short = (
+        params.pop("dtype", "??")
+        .replace("float", "f")
+        .replace("int", "i")
+        .replace("complex", "c")
+    )
+
+    pretty_name = f"{base}({array}<{dtype_short}[{length}]>"
+
+    # any extra parameters to the function, e.g. `axis=0`
+    for k, v in params.items():
+        pretty_name += f", {k}={v}"
+
+    pretty_name += ")"
+    return pretty_name