diff --git a/benches/bigdecimal/aggregate.py b/benches/bigdecimal/aggregate.py
index db0a4e9e..c1427788 100644
--- a/benches/bigdecimal/aggregate.py
+++ b/benches/bigdecimal/aggregate.py
@@ -328,7 +328,9 @@ def main() -> int:
     offset = now_local.strftime("%z")
     offset_str = f"UTC{offset[:3]}:{offset[3:]}" if len(offset) == 5 else "UTC"
     header_ts = f"{now_local.strftime('%Y-%m-%d %H:%M:%S')} ({offset_str})"
-    out_path = args.out or os.path.join(args.reports_dir, f"bigdecimal_report_{ts}.md")
+    out_path = args.out or os.path.join(
+        args.reports_dir, f"bigdecimal_report_utc_{ts}.md"
+    )
 
     log_index = discover_logs(args.logs_dir)
     records: dict[str, dict[int, dict[str, dict[str, dict[str, str]]]]] = {}
diff --git a/benches/bigint/.gitignore b/benches/bigint/.gitignore
new file mode 100644
index 00000000..6abc98fe
--- /dev/null
+++ b/benches/bigint/.gitignore
@@ -0,0 +1,11 @@
+# local
+logs/
+reports/
+cases.cache/
+
+# build artifacts
+mojo/bench
+
+# Rust build output
+rust/target/
+rust/Cargo.lock
\ No newline at end of file
diff --git a/benches/bigint/README.md b/benches/bigint/README.md
new file mode 100644
index 00000000..201a38d2
--- /dev/null
+++ b/benches/bigint/README.md
@@ -0,0 +1,95 @@
+# BigInt cross-language benchmarks
+
+This harness benchmarks `decimo.BigInt` against two arbitrary-precision
+integer implementations:
+
+| Lang     | Library              | Role                                                      |
+| -------- | -------------------- | --------------------------------------------------------- |
+| `mojo`   | `decimo.BigInt`      | System under test.                                        |
+| `python` | `int` (stdlib)       | Correctness oracle (drives `match` flag) + timing column. |
+| `rust`   | `num-bigint::BigInt` | Static-compiled peer (timing column).                     |
+
+BigInt arithmetic is exact, so — unlike the BigDecimal harness — there is
+**no precision parameter**. The report shows one timings table per op.
+
+## Layout
+
+```txt
+    cases/            # source-of-truth TOML test cases (one file per op)
+    mojo/   bench.mojo   +  bench   (release-built binary)
+    python/ bench.py
+    rust/   Cargo.toml  +  src/main.rs   (num-bigint harness)
+    aggregate.py      # logs/*.csv  ->  reports/bigint_report_utc_<ts>.md
+    run_all.sh        # build all available, run all ops, aggregate
+    logs/             # per-language CSV bench logs (generated)
+    reports/          # generated markdown reports
+```
+
+Log filenames embed the op so multiple runs can coexist:
+
+```txt
+    logs/<lang>_<op>_<YYYYMMDD>_<HHMMSS>.csv
+```
+
+## Quick start
+
+```sh
+./run_all.sh                          # all default ops
+./run_all.sh --ops multiply power     # subset of ops
+```
+
+Direct invocation of any single harness:
+
+```sh
+# Mojo
+cd mojo
+pixi run --manifest-path ../../../pixi.toml ./bench \
+    --op multiply --cases-dir ../cases --logs-dir ../logs
+
+# Python
+cd python
+pixi run --manifest-path ../../../pixi.toml python3 bench.py \
+    --op multiply --cases-dir ../cases --logs-dir ../logs
+
+# Rust
+cd rust
+cargo run --release --quiet -- \
+    --op multiply --cases-dir ../cases --logs-dir ../logs
+```
+
+## Ops
+
+`add`, `multiply`, `floor_divide` (`//`), `power`, `shift` (`<<`), `sqrt`
+(integer square root), `from_string`, `to_string`.
+
+For `power` and `shift` the `b` field encodes a small integer (the exponent
+or shift count); all other binary ops take `b` as the second operand.
+
+## Adding a new test case
+
+Edit the appropriate `cases/<op>.toml` file:
+
+```toml
+[config]
+iterations = 500           # auto-tuner cap; actual count targets ~50ms
+
+[[cases]]
+name = "Large integer multiply"
+a    = "{9,100}"           # {C,N} repeats C, N times → 100 nines
+b    = "{9,100}"
+```
+
+## Report
+
+The aggregator emits a markdown report with three sections:
+
+1. **Cross-op overview** — one row per op showing median ns/iter per
+   language plus `dm/py` and `dm/rs` ratios.
+2. **Per-op detail** — for each op, one timings table. Each row carries a
+   `match py` flag (`OK` / `DIFF`) comparing `decimo` against Python.
+   Every `DIFF` is expanded inline as a collapsible `<details>` block
+   listing every language's full result string.
+3. **Agreement summary** — `decimo`-vs-Python match rate per op.
+
+Ratios are `decimo ÷ peer`, so **a value below `1.00x` means `decimo` is
+faster** than that peer.
diff --git a/benches/bigint/aggregate.py b/benches/bigint/aggregate.py
new file mode 100644
index 00000000..9d4c3812
--- /dev/null
+++ b/benches/bigint/aggregate.py
@@ -0,0 +1,482 @@
+#!/usr/bin/env python3
+"""Aggregate per-language CSV bench logs into a side-by-side markdown report.
+
+Languages: mojo (decimo.BigInt, system under test), python (int, oracle),
+rust (num-bigint, compiled peer).
+
+BigInt is exact, so there is NO precision dimension. Log filenames look
+like `<lang>_<op>_<ts>.csv`; the report shows one timings table per op.
+
+The `match py` column is **OK** iff `decimo` and Python agree on the result
+value. Integers have a single canonical decimal form, so this is exact
+string equality (with a numeric fallback). DIFF cases are expanded inside
+collapsible `<details>` blocks listing every language's full result.
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import glob
+import os
+import platform
+import re
+import shutil
+import statistics
+import subprocess
+import sys
+from datetime import datetime, timezone
+
+# Result strings can run into tens of thousands of characters (e.g.
+# 50000-digit from_string cases). Raise the CSV field limit accordingly.
+csv.field_size_limit(10_000_000)
+
+
+LANGS_DEFAULT = ["mojo", "python", "rust"]
+LANG_LABEL = {"mojo": "decimo", "python": "python", "rust": "rust"}
+
+# Maximum width for case names rendered in markdown tables. Long names are
+# truncated with an ellipsis to keep tables readable; the full name is still
+# shown verbatim inside `DIFF` blocks.
+DISPLAY_NAME_MAX = 48
+
+
+def _short_name(name: str) -> str:
+    if len(name) <= DISPLAY_NAME_MAX:
+        return name
+    return name[: DISPLAY_NAME_MAX - 1] + "…"  # ellipsis
+
+
+def _values_equal(a: str, b: str) -> bool:
+    """Compare two integer result strings as values.
+
+    Falls back to string equality if either side cannot be parsed as an
+    integer (e.g. an ``ERR: ...`` marker).
+    """
+    if a == b:
+        return True
+    try:
+        return int(a) == int(b)
+    except (TypeError, ValueError):
+        return False
+
+
+LOG_RE = re.compile(r"^(?P<lang>[a-z]+)_(?P<op>[a-z_]+)_(?P<ts>\d{8}_\d{6})\.csv$")
+
+
+def discover_logs(logs_dir: str) -> dict[tuple[str, str], str]:
+    latest: dict[tuple[str, str], tuple[str, str]] = {}
+    for path in glob.glob(os.path.join(logs_dir, "*.csv")):
+        m = LOG_RE.match(os.path.basename(path))
+        if not m:
+            continue
+        key = (m.group("lang"), m.group("op"))
+        ts = m.group("ts")
+        if key not in latest or ts > latest[key][1]:
+            latest[key] = (path, ts)
+    return {k: v[0] for k, v in latest.items()}
+
+
+def load(path: str) -> list[dict[str, str]]:
+    with open(path, newline="") as f:
+        return list(csv.DictReader(f))
+
+
+def fmt_num(s) -> str:
+    if s is None or s == "":
+        return "-"
+    try:
+        return f"{float(s):,.2f}"
+    except (TypeError, ValueError):
+        return str(s)
+
+
+def fmt_ratio(num, den) -> str:
+    try:
+        n, d = float(num), float(den)
+        if d == 0:
+            return "-"
+        return f"{n / d:,.2f}x"
+    except (TypeError, ValueError):
+        return "-"
+
+
+def median_ns(rows: list[dict[str, str]]) -> float | None:
+    vals: list[float] = []
+    for r in rows:
+        try:
+            vals.append(float(r["ns_per_iter"]))
+        except (KeyError, ValueError):
+            pass
+    return statistics.median(vals) if vals else None
+
+
+def _run(cmd: list[str]) -> str:
+    try:
+        out = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=5, check=False
+        ).stdout.strip()
+        return out.splitlines()[0].strip() if out else ""
+    except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
+        return ""
+
+
+def collect_env_info() -> list[tuple[str, str]]:
+    info: list[tuple[str, str]] = [
+        ("OS", f"{platform.system()} {platform.release()}"),
+        ("Arch", platform.machine()),
+    ]
+    cpu = ""
+    if platform.system() == "Darwin":
+        cpu = _run(["sysctl", "-n", "machdep.cpu.brand_string"])
+    elif platform.system() == "Linux":
+        try:
+            with open("/proc/cpuinfo") as f:
+                for line in f:
+                    if line.startswith("model name"):
+                        cpu = line.split(":", 1)[1].strip()
+                        break
+        except OSError:
+            pass
+    if cpu:
+        info.append(("CPU", cpu))
+    if shutil.which("pixi"):
+        v = _run(["pixi", "run", "mojo", "--version"])
+        if v:
+            info.append(("Mojo", v))
+    info.append(("Python", platform.python_version()))
+    rustc = _run(["rustc", "--version"])
+    if rustc:
+        info.append(("Rust", rustc))
+    return info
+
+
+def _is_numeric_col(values: list[str]) -> bool:
+    seen = False
+    for v in values:
+        if v in ("", "-"):
+            continue
+        seen = True
+        s = v.rstrip("x").replace(",", "")
+        try:
+            float(s)
+        except ValueError:
+            return False
+    return seen
+
+
+# Threshold above which a DIFF result is considered "long" and worth
+# folding the shared head / tail. 200 chars is roughly two lines on a
+# typical viewer, so anything past that benefits from folding.
+_FOLD_LONG_THRESHOLD = 200
+_FOLD_MIN_RUN = 40
+_FOLD_KEEP_EDGE = 8
+
+
+def _fold_diff_results(results: list[str | None]) -> list[str | None]:
+    """Fold long DIFF result strings around the diverging region.
+
+    Given N result strings (some may be ``None`` for missing rows), find
+    the longest common prefix and longest common suffix shared by ALL
+    non-None results. If both are long enough, replace those runs with
+    ``(K same chars)`` markers, keeping a few boundary chars verbatim.
+    """
+    real = [r for r in results if r is not None]
+    if not real:
+        return results
+    if max(len(r) for r in real) < _FOLD_LONG_THRESHOLD:
+        return results
+    prefix_len = 0
+    min_len = min(len(r) for r in real)
+    while prefix_len < min_len and all(
+        r[prefix_len] == real[0][prefix_len] for r in real
+    ):
+        prefix_len += 1
+    suffix_len = 0
+    while suffix_len < min_len - prefix_len and all(
+        r[-1 - suffix_len] == real[0][-1 - suffix_len] for r in real
+    ):
+        suffix_len += 1
+
+    fold_prefix = prefix_len >= _FOLD_MIN_RUN
+    fold_suffix = suffix_len >= _FOLD_MIN_RUN
+    if not fold_prefix and not fold_suffix:
+        return results
+
+    out: list[str | None] = []
+    for r in results:
+        if r is None:
+            out.append(None)
+            continue
+        head_keep = _FOLD_KEEP_EDGE if fold_prefix else 0
+        tail_keep = _FOLD_KEEP_EDGE if fold_suffix else 0
+        head_keep = min(head_keep, prefix_len)
+        tail_keep = min(tail_keep, suffix_len)
+        head_fold = prefix_len - head_keep
+        tail_fold = suffix_len - tail_keep
+        middle_start = prefix_len
+        middle_end = len(r) - suffix_len
+        parts: list[str] = []
+        if fold_prefix:
+            parts.append(f"({head_fold} same chars)...")
+            parts.append(r[prefix_len - head_keep : prefix_len])
+        else:
+            parts.append(r[:prefix_len])
+        parts.append(r[middle_start:middle_end])
+        if fold_suffix:
+            parts.append(r[len(r) - suffix_len : len(r) - suffix_len + tail_keep])
+            parts.append(f"...({tail_fold} same chars)")
+        else:
+            parts.append(r[len(r) - suffix_len :])
+        out.append("".join(parts))
+    return out
+
+
+def render_aligned_table(header: list[str], rows: list[list[str]]) -> list[str]:
+    cols = len(header)
+    widths = [len(h) for h in header]
+    for r in rows:
+        for i in range(cols):
+            if i < len(r) and len(r[i]) > widths[i]:
+                widths[i] = len(r[i])
+    aligns = [
+        "right" if _is_numeric_col([r[i] for r in rows if i < len(r)]) else "left"
+        for i in range(cols)
+    ]
+
+    def pad(cell: str, w: int, align: str) -> str:
+        return cell.rjust(w) if align == "right" else cell.ljust(w)
+
+    out: list[str] = []
+    out.append(
+        "| "
+        + " | ".join(pad(header[i], widths[i], aligns[i]) for i in range(cols))
+        + " |"
+    )
+    sep = []
+    for i in range(cols):
+        sep.append(
+            "-" * (widths[i] - 1) + ":" if aligns[i] == "right" else "-" * widths[i]
+        )
+    out.append("| " + " | ".join(sep) + " |")
+    for r in rows:
+        cells = []
+        for i in range(cols):
+            v = r[i] if i < len(r) else ""
+            cells.append(pad(v, widths[i], aligns[i]))
+        out.append("| " + " | ".join(cells) + " |")
+    return out
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--logs-dir", default="logs")
+    ap.add_argument("--reports-dir", default="reports")
+    ap.add_argument("--ops", nargs="+", required=True)
+    ap.add_argument("--langs", nargs="+", default=LANGS_DEFAULT)
+    ap.add_argument("--out", default=None)
+    args = ap.parse_args()
+
+    os.makedirs(args.reports_dir, exist_ok=True)
+    ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+    now_local = datetime.now().astimezone()
+    offset = now_local.strftime("%z")
+    offset_str = f"UTC{offset[:3]}:{offset[3:]}" if len(offset) == 5 else "UTC"
+    header_ts = f"{now_local.strftime('%Y-%m-%d %H:%M:%S')} ({offset_str})"
+    out_path = args.out or os.path.join(args.reports_dir, f"bigint_report_utc_{ts}.md")
+
+    log_index = discover_logs(args.logs_dir)
+    # records[op][lang][case_name] -> row dict
+    records: dict[str, dict[str, dict[str, dict[str, str]]]] = {}
+    case_orders: dict[str, list[str]] = {}
+    for op in args.ops:
+        records[op] = {}
+        case_orders[op] = []
+        for lang in args.langs:
+            path = log_index.get((lang, op))
+            if not path:
+                continue
+            d: dict[str, dict[str, str]] = {}
+            for r in load(path):
+                if r["case_name"] not in case_orders[op]:
+                    case_orders[op].append(r["case_name"])
+                d[r["case_name"]] = r
+            records[op][lang] = d
+
+    ratio_pairs = [l for l in args.langs if l != "mojo"]
+    ratio_short = {"python": "py", "rust": "rs"}
+
+    lines: list[str] = [
+        "# BigInt cross-language benchmark report",
+        "",
+        f"- Generated: {header_ts}",
+        f"- Languages: {', '.join(LANG_LABEL.get(l, l) for l in args.langs)}",
+        f"- Ops: {', '.join(args.ops)}",
+        "- **Time unit: nanoseconds per iteration (ns/iter)** — lower is faster.",
+        "",
+        "All timing columns (`decimo`, `python`, `rust`) are **ns / iter**.",
+        "Each per-op timings table has a single correctness column,",
+        "`match py` (vs Python `int`), comparing integer values: `OK` when",
+        "`decimo` equals the Python oracle, `DIFF` when it disagrees, and",
+        "`N/A` when either the `decimo` or `python` row is missing (so",
+        "correctness was not checked). Only real `DIFF` cases are listed in",
+        "the DIFF block.",
+        "",
+        f"Ratio columns: `dm/{ratio_short.get('python')}` = decimo ÷ python, "
+        f"`dm/{ratio_short.get('rust')}` = decimo ÷ rust "
+        "(**< 1.00 means decimo is faster**).",
+        "",
+        "## 0. System & toolchain",
+        "",
+        "```txt",
+    ]
+    for k, v in collect_env_info():
+        lines.append(f"{(k + ':').ljust(16)}{v}")
+    lines.append("```")
+    lines.append("")
+
+    # ----- Section 1: cross-op overview -----
+    lines.append("## 1. Cross-op overview")
+    lines.append("")
+    overview_header = ["op", "cases"]
+    for lang in args.langs:
+        overview_header.append(LANG_LABEL.get(lang, lang))
+    for lang in ratio_pairs:
+        overview_header.append(f"dm/{ratio_short.get(lang, lang)}")
+    overview_rows: list[list[str]] = []
+    for op in args.ops:
+        row = [op, str(len(case_orders[op]))]
+        meds: dict[str, float | None] = {}
+        for lang in args.langs:
+            rows = list(records[op].get(lang, {}).values())
+            m = median_ns(rows)
+            meds[lang] = m
+            row.append(fmt_num(m))
+        for lang in ratio_pairs:
+            row.append(fmt_ratio(meds.get("mojo"), meds.get(lang)))
+        overview_rows.append(row)
+    lines.extend(render_aligned_table(overview_header, overview_rows))
+    lines.append("")
+
+    # ----- Section 2: per-op detail -----
+    lines.append("## 2. Per-op detail")
+    lines.append("")
+    for op in args.ops:
+        lines.append(f"### {op}")
+        lines.append("")
+        per_lang = records[op]
+        if not per_lang:
+            lines.append("_no logs found_\n")
+            continue
+        present_langs = [lang for lang in args.langs if lang in per_lang]
+        present_ratio_pairs = [
+            lang for lang in ratio_pairs if lang in per_lang and "mojo" in per_lang
+        ]
+
+        # `py_match` is tri-state: True (OK), False (DIFF), or None
+        # (N/A — not checked because a `mojo` or `python` row is missing).
+        case_records: list[tuple[str, bool | None, dict[str, dict[str, str]]]] = []
+        for case in case_orders[op]:
+            recs = {
+                lang: per_lang.get(lang, {}).get(case, {}) for lang in present_langs
+            }
+            mojo_val = recs.get("mojo", {}).get("result")
+            py_val = recs.get("python", {}).get("result")
+            if mojo_val is None or py_val is None:
+                py_match = None
+            else:
+                py_match = _values_equal(mojo_val, py_val)
+            case_records.append((case, py_match, recs))
+
+        time_header = ["case", "match py"] + [
+            LANG_LABEL.get(l, l) for l in present_langs
+        ]
+        for lang in present_ratio_pairs:
+            time_header.append(f"dm/{ratio_short.get(lang, lang)}")
+        time_body: list[list[str]] = []
+        for case, py_match, recs in case_records:
+            if py_match is None:
+                py_cell = "N/A"
+            elif py_match:
+                py_cell = "OK"
+            else:
+                py_cell = "DIFF"
+            row = [_short_name(case), py_cell]
+            for lang in present_langs:
+                row.append(
+                    fmt_num(recs[lang].get("ns_per_iter") if recs[lang] else None)
+                )
+            for lang in present_ratio_pairs:
+                m = recs.get("mojo", {}).get("ns_per_iter")
+                r = recs.get(lang, {}).get("ns_per_iter")
+                row.append(fmt_ratio(m, r))
+            time_body.append(row)
+        lines.extend(render_aligned_table(time_header, time_body))
+        lines.append("")
+
+        # A case is shown in the DIFF block iff decimo genuinely disagrees
+        # with the Python oracle (`py_match is False`). Cases that were not
+        # checked (`py_match is None`, e.g. a harness was skipped) are
+        # excluded — they are not mismatches.
+        diffs = [t for t in case_records if t[1] is False]
+        if diffs:
+            lines.append(
+                f"<details><summary>{len(diffs)} DIFF case(s) at "
+                f"<code>{op}</code> — click to expand</summary>"
+            )
+            lines.append("")
+            for case, _py_match, recs in diffs:
+                lines.append(f"**{case}**")
+                lines.append("")
+                pairs: list[tuple[str, str | None]] = []
+                for lang in args.langs:
+                    rec = recs.get(lang, {}) if lang in present_langs else {}
+                    r = rec.get("result") if rec else None
+                    pairs.append((LANG_LABEL.get(lang, lang), r))
+                folded = _fold_diff_results([p[1] for p in pairs])
+                lines.append("```")
+                for (label, _), shown in zip(pairs, folded):
+                    if shown is None:
+                        lines.append(f"{label}: (no row)")
+                    else:
+                        lines.append(f"{label}: {shown}")
+                lines.append("```")
+                lines.append("")
+            lines.append("</details>")
+            lines.append("")
+
+    # ----- Section 3: agreement summary -----
+    lines.append("## 3. decimo-vs-python agreement summary")
+    lines.append("")
+    eq_header = ["op", "checked", "matched", "mismatched", "match %"]
+    eq_rows: list[list[str]] = []
+    for op in args.ops:
+        # Only count cases where BOTH `mojo` and `python` results are
+        # present; cases missing either side were not checked and must not
+        # inflate the mismatch count. `match %` is `-` when nothing was
+        # checked (e.g. the Python harness was skipped).
+        checked = 0
+        matched = 0
+        for case in case_orders[op]:
+            per_lang = records[op]
+            mojo_val = per_lang.get("mojo", {}).get(case, {}).get("result")
+            py_val = per_lang.get("python", {}).get(case, {}).get("result")
+            if mojo_val is None or py_val is None:
+                continue
+            checked += 1
+            if _values_equal(mojo_val, py_val):
+                matched += 1
+        pct = f"{(100.0 * matched / checked):.1f}%" if checked else "-"
+        eq_rows.append([op, str(checked), str(matched), str(checked - matched), pct])
+    lines.extend(render_aligned_table(eq_header, eq_rows))
+
+    report = "\n".join(lines) + "\n"
+    sys.stdout.write(report)
+    with open(out_path, "w") as f:
+        f.write(report)
+    print(f"\n>>> Wrote {out_path}", file=sys.stderr)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/benches/bigint/bench.mojo b/benches/bigint/bench.mojo
deleted file mode 100644
index 3542f809..00000000
--- a/benches/bigint/bench.mojo
+++ /dev/null
@@ -1,67 +0,0 @@
-"""Unified BigInt benchmark runner. Compares BigInt10 vs BigInt vs Python int."""
-
-from bench_add import main as bench_add
-from bench_multiply import main as bench_multiply
-from bench_floor_divide import main as bench_floor_divide
-from bench_truncate_divide import main as bench_truncate_divide
-from bench_sqrt import main as bench_sqrt
-from bench_power import main as bench_power
-from bench_from_string import main as bench_from_string
-from bench_to_string import main as bench_to_string
-from bench_shift import main as bench_shift
-
-
-def main() raises:
-    while True:
-        print(
-            """
-=========================================
-  BigInt Benchmarks (BigInt10 vs BigInt)
-=========================================
-add:       Addition
-mul:       Multiplication
-fdiv:      Floor Division
-tdiv:      Truncate Division
-sqrt:      Integer Square Root (BigUInt vs BigInt)
-power:     Power / Exponentiation
-fromstr:   String → BigInt construction
-tostr:     BigInt → String conversion
-shift:     Left Shift (BigInt only)
-all:       Run all benchmarks
-q:         Exit
-=========================================
-"""
-        )
-        var command = input("Type name of bench you want to run: ")
-        if command == "add":
-            bench_add()
-        elif command == "mul":
-            bench_multiply()
-        elif command == "fdiv":
-            bench_floor_divide()
-        elif command == "tdiv":
-            bench_truncate_divide()
-        elif command == "sqrt":
-            bench_sqrt()
-        elif command == "power":
-            bench_power()
-        elif command == "fromstr":
-            bench_from_string()
-        elif command == "tostr":
-            bench_to_string()
-        elif command == "shift":
-            bench_shift()
-        elif command == "all":
-            bench_add()
-            bench_multiply()
-            bench_floor_divide()
-            bench_truncate_divide()
-            bench_sqrt()
-            bench_power()
-            bench_from_string()
-            bench_to_string()
-            bench_shift()
-        elif command == "q":
-            return
-        else:
-            print("Invalid input")
diff --git a/benches/bigint/bench_add.mojo b/benches/bigint/bench_add.mojo
deleted file mode 100644
index d9e8f831..00000000
--- a/benches/bigint/bench_add.mojo
+++ /dev/null
@@ -1,124 +0,0 @@
-"""Benchmarks for BigInt addition. Compares BigInt10, BigInt, and Python int."""
-
-from decimo.bigint10.bigint10 import BigInt10
-import decimo.bigint10.arithmetics
-from decimo.bigint.bigint import BigInt
-import decimo.bigint.arithmetics
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary_dual,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf_bigint10: List[Float64],
-    mut sf_bigint: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-    log_print("a: " + bc.a[byte=:80], log_file)
-    log_print("b: " + bc.b[byte=:80], log_file)
-
-    var m1a = BigInt10(bc.a)
-    var m1b = BigInt10(bc.b)
-    var m2a = BigInt(bc.a)
-    var m2b = BigInt(bc.b)
-    var py = Python.import_module("builtins")
-    var pa = py.int(bc.a)
-    var pb = py.int(bc.b)
-
-    try:
-        var r1 = m1a + m1b
-        var r2 = m2a + m2b
-        var rp = pa + pb
-
-        var r1_str = String(r1)
-        var r2_str = String(r2)
-        var rp_str = String(rp)
-
-        # Correctness check: string match
-        if r1_str != rp_str or r2_str != rp_str:
-            log_print("*** WARNING: String mismatch detected! ***", log_file)
-            log_print("BigInt10 result: " + r1_str[byte=:120], log_file)
-            log_print("BigInt result:  " + r2_str[byte=:120], log_file)
-            log_print("Python result:   " + rp_str[byte=:120], log_file)
-
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m1a + m1b
-        var t1 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t1 == 0:
-            t1 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m2a + m2b
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = pa + pb
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s1 = Float64(tp) / Float64(t1)
-        var s2 = Float64(tp) / Float64(t2)
-        sf_bigint10.append(s1)
-        sf_bigint.append(s2)
-
-        log_print("BigInt10:        " + String(t1) + " ns/iter", log_file)
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("BigInt10 speedup:" + String(s1) + "×", log_file)
-        log_print("BigInt speedup: " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_add")
-    print_header("Decimo BigInt Addition Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/add.toml")
-    var iterations = load_bench_iterations("bench_data/add.toml")
-    var sf1 = List[Float64]()
-    var sf2 = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " addition benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf1, sf2)
-
-    print_summary_dual(
-        "BigInt Addition Benchmark Summary",
-        sf1,
-        "BigInt10",
-        sf2,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_data/truncate_divide.toml b/benches/bigint/bench_data/truncate_divide.toml
deleted file mode 100644
index 11dad594..00000000
--- a/benches/bigint/bench_data/truncate_divide.toml
+++ /dev/null
@@ -1,117 +0,0 @@
-[config]
-iterations = 100
-
-[[cases]]
-a = "100"
-b = "10"
-name = "Simple division, no remainder"
-
-[[cases]]
-a = "10"
-b = "3"
-name = "Division with remainder"
-
-[[cases]]
-a = "7"
-b = "2"
-name = "Division of small numbers"
-
-[[cases]]
-a = "5"
-b = "10"
-name = "Division resulting in zero"
-
-[[cases]]
-a = "12345"
-b = "1"
-name = "Division by one"
-
-[[cases]]
-a = "-10"
-b = "3"
-name = "Negative dividend, positive divisor"
-
-[[cases]]
-a = "10"
-b = "-3"
-name = "Positive dividend, negative divisor"
-
-[[cases]]
-a = "-10"
-b = "-3"
-name = "Negative dividend, negative divisor"
-
-[[cases]]
-a = "0"
-b = "5"
-name = "Zero dividend"
-
-[[cases]]
-a = "9999999999"
-b = "333"
-name = "Large number division"
-
-[[cases]]
-a = "1{0,50}"
-b = "7"
-name = "Very large number division"
-
-[[cases]]
-a = "1{0,30}"
-b = "1{0,10}"
-name = "Division of large numbers with exact result"
-
-[[cases]]
-a = "12345"
-b = "{9,20}"
-name = "Division by large number"
-
-[[cases]]
-a = "6765"
-b = "4181"
-name = "Fibonacci number division"
-
-[[cases]]
-a = "2147483647"
-b = "997"
-name = "Prime number division"
-
-[[cases]]
-a = "9223372036854775807"
-b = "2"
-name = "Division near Int64 limit"
-
-[[cases]]
-a = "{12345,10}"
-b = "{6789,12}"
-name = "Division with around 50 digits divisor just below dividend"
-
-[[cases]]
-a = "1{0,20}"
-b = "1{0,5}"
-name = "Division with exact powers of 10"
-
-[[cases]]
-a = "{990132857498314692374162398217,10}"
-b = "{85172390413429847239,10}"
-name = "Division of repeated digits"
-
-[[cases]]
-a = "{9,100}"
-b = "3"
-name = "Extreme large dividend and small divisor"
-
-[[cases]]
-a = "{9,1000}"
-b = "{3,500}"
-name = "1000-digit dividend / 500-digit divisor"
-
-[[cases]]
-a = "{9,5000}"
-b = "{7,2500}"
-name = "5000-digit dividend / 2500-digit divisor"
-
-[[cases]]
-a = "{9,10000}"
-b = "{7,5000}"
-name = "10000-digit dividend / 5000-digit divisor"
diff --git a/benches/bigint/bench_floor_divide.mojo b/benches/bigint/bench_floor_divide.mojo
deleted file mode 100644
index e33442fb..00000000
--- a/benches/bigint/bench_floor_divide.mojo
+++ /dev/null
@@ -1,124 +0,0 @@
-"""Benchmarks for BigInt floor division. Compares BigInt10, BigInt, and Python int."""
-
-from decimo.bigint10.bigint10 import BigInt10
-import decimo.bigint10.arithmetics
-from decimo.bigint.bigint import BigInt
-import decimo.bigint.arithmetics
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary_dual,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf_bigint10: List[Float64],
-    mut sf_bigint: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-    log_print("a: " + bc.a[byte=:80], log_file)
-    log_print("b: " + bc.b[byte=:80], log_file)
-
-    var m1a = BigInt10(bc.a)
-    var m1b = BigInt10(bc.b)
-    var m2a = BigInt(bc.a)
-    var m2b = BigInt(bc.b)
-    var py = Python.import_module("builtins")
-    var pa = py.int(bc.a)
-    var pb = py.int(bc.b)
-
-    try:
-        var r1 = decimo.bigint10.arithmetics.floor_divide(m1a, m1b)
-        var r2 = decimo.bigint.arithmetics.floor_divide(m2a, m2b)
-        var rp = pa // pb
-
-        var r1_str = String(r1)
-        var r2_str = String(r2)
-        var rp_str = String(rp)
-
-        # Correctness check: string match
-        if r1_str != rp_str or r2_str != rp_str:
-            log_print("*** WARNING: String mismatch detected! ***", log_file)
-            log_print("BigInt10 result: " + r1_str[byte=:80], log_file)
-            log_print("BigInt result:  " + r2_str[byte=:80], log_file)
-            log_print("Python result:   " + rp_str[byte=:80], log_file)
-
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = decimo.bigint10.arithmetics.floor_divide(m1a, m1b)
-        var t1 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t1 == 0:
-            t1 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = decimo.bigint.arithmetics.floor_divide(m2a, m2b)
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = pa // pb
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s1 = Float64(tp) / Float64(t1)
-        var s2 = Float64(tp) / Float64(t2)
-        sf_bigint10.append(s1)
-        sf_bigint.append(s2)
-
-        log_print("BigInt10:        " + String(t1) + " ns/iter", log_file)
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("BigInt10 speedup:" + String(s1) + "×", log_file)
-        log_print("BigInt speedup: " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_floor_divide")
-    print_header("Decimo BigInt Floor Division Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/floor_divide.toml")
-    var iterations = load_bench_iterations("bench_data/floor_divide.toml")
-    var sf1 = List[Float64]()
-    var sf2 = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " floor division benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf1, sf2)
-
-    print_summary_dual(
-        "BigInt Floor Division Benchmark Summary",
-        sf1,
-        "BigInt10",
-        sf2,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_from_string.mojo b/benches/bigint/bench_from_string.mojo
deleted file mode 100644
index a862a1a5..00000000
--- a/benches/bigint/bench_from_string.mojo
+++ /dev/null
@@ -1,103 +0,0 @@
-"""Benchmarks for BigInt from_string construction. Compares BigInt10, BigInt, and Python int."""
-
-from decimo.bigint10.bigint10 import BigInt10
-from decimo.bigint.bigint import BigInt
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary_dual,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf_bigint10: List[Float64],
-    mut sf_bigint: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-    log_print(
-        "a: " + bc.a[byte=:80] + (" ..." if len(bc.a) > 80 else ""), log_file
-    )
-    log_print("digits:          " + String(len(bc.a)), log_file)
-
-    var py = Python.import_module("builtins")
-
-    try:
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = BigInt10(bc.a)
-        var t1 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t1 == 0:
-            t1 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = BigInt(bc.a)
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = py.int(bc.a)
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s1 = Float64(tp) / Float64(t1)
-        var s2 = Float64(tp) / Float64(t2)
-        sf_bigint10.append(s1)
-        sf_bigint.append(s2)
-
-        log_print("BigInt10:        " + String(t1) + " ns/iter", log_file)
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("BigInt10 speedup:" + String(s1) + "×", log_file)
-        log_print("BigInt speedup: " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_from_string")
-    print_header("Decimo BigInt from_string Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/from_string.toml")
-    var iterations = load_bench_iterations("bench_data/from_string.toml")
-    var sf1 = List[Float64]()
-    var sf2 = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " from_string benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf1, sf2)
-
-    print_summary_dual(
-        "BigInt from_string Benchmark Summary",
-        sf1,
-        "BigInt10",
-        sf2,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_multiply.mojo b/benches/bigint/bench_multiply.mojo
deleted file mode 100644
index 1775adcb..00000000
--- a/benches/bigint/bench_multiply.mojo
+++ /dev/null
@@ -1,124 +0,0 @@
-"""Benchmarks for BigInt multiplication. Compares BigInt10, BigInt, and Python int."""
-
-from decimo.bigint10.bigint10 import BigInt10
-import decimo.bigint10.arithmetics
-from decimo.bigint.bigint import BigInt
-import decimo.bigint.arithmetics
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary_dual,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf_bigint10: List[Float64],
-    mut sf_bigint: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-    log_print("a: " + bc.a[byte=:80], log_file)
-    log_print("b: " + bc.b[byte=:80], log_file)
-
-    var m1a = BigInt10(bc.a)
-    var m1b = BigInt10(bc.b)
-    var m2a = BigInt(bc.a)
-    var m2b = BigInt(bc.b)
-    var py = Python.import_module("builtins")
-    var pa = py.int(bc.a)
-    var pb = py.int(bc.b)
-
-    try:
-        var r1 = m1a * m1b
-        var r2 = m2a * m2b
-        var rp = pa * pb
-
-        var r1_str = String(r1)
-        var r2_str = String(r2)
-        var rp_str = String(rp)
-
-        # Correctness check: string match
-        if r1_str != rp_str or r2_str != rp_str:
-            log_print("*** WARNING: String mismatch detected! ***", log_file)
-            log_print("BigInt10 result: " + r1_str[byte=:80], log_file)
-            log_print("BigInt result:  " + r2_str[byte=:80], log_file)
-            log_print("Python result:   " + rp_str[byte=:80], log_file)
-
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m1a * m1b
-        var t1 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t1 == 0:
-            t1 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m2a * m2b
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = pa * pb
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s1 = Float64(tp) / Float64(t1)
-        var s2 = Float64(tp) / Float64(t2)
-        sf_bigint10.append(s1)
-        sf_bigint.append(s2)
-
-        log_print("BigInt10:        " + String(t1) + " ns/iter", log_file)
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("BigInt10 speedup:" + String(s1) + "×", log_file)
-        log_print("BigInt speedup: " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_multiply")
-    print_header("Decimo BigInt Multiplication Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/multiply.toml")
-    var iterations = load_bench_iterations("bench_data/multiply.toml")
-    var sf1 = List[Float64]()
-    var sf2 = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " multiplication benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf1, sf2)
-
-    print_summary_dual(
-        "BigInt Multiplication Benchmark Summary",
-        sf1,
-        "BigInt10",
-        sf2,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_power.mojo b/benches/bigint/bench_power.mojo
deleted file mode 100644
index 89675b70..00000000
--- a/benches/bigint/bench_power.mojo
+++ /dev/null
@@ -1,122 +0,0 @@
-"""Benchmarks for BigInt exponentiation. Compares BigInt10, BigInt, and Python int."""
-
-from decimo.bigint10.bigint10 import BigInt10
-from decimo.bigint.bigint import BigInt
-import decimo.bigint.arithmetics
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary_dual,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf_bigint10: List[Float64],
-    mut sf_bigint: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-    log_print("base: " + bc.a[byte=:80], log_file)
-    log_print("exp:  " + bc.b[byte=:80], log_file)
-
-    var m1_base = BigInt10(bc.a)
-    var base = BigInt(bc.a)
-    var exp_int = Int(BigInt(bc.b))
-    var py = Python.import_module("builtins")
-    var pa = py.int(bc.a)
-    var pb = py.int(bc.b)
-
-    try:
-        var r1 = m1_base.power(exp_int)
-        var r2 = base**exp_int
-        var rp = pa**pb
-
-        var r1_str = String(r1)
-        var r2_str = String(r2)
-        var rp_str = String(rp)
-
-        # Correctness check: string match
-        if r1_str != rp_str or r2_str != rp_str:
-            log_print("*** WARNING: String mismatch detected! ***", log_file)
-            log_print("BigInt10 result: " + r1_str[byte=:80], log_file)
-            log_print("BigInt result:  " + r2_str[byte=:80], log_file)
-            log_print("Python result:   " + rp_str[byte=:80], log_file)
-
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m1_base.power(exp_int)
-        var t1 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t1 == 0:
-            t1 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = base**exp_int
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = pa**pb
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s1 = Float64(tp) / Float64(t1)
-        var s2 = Float64(tp) / Float64(t2)
-        sf_bigint10.append(s1)
-        sf_bigint.append(s2)
-
-        log_print("BigInt10:        " + String(t1) + " ns/iter", log_file)
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("BigInt10 speedup:" + String(s1) + "×", log_file)
-        log_print("BigInt speedup: " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_power")
-    print_header("Decimo BigInt Power Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/power.toml")
-    var iterations = load_bench_iterations("bench_data/power.toml")
-    var sf1 = List[Float64]()
-    var sf2 = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " power benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf1, sf2)
-
-    print_summary_dual(
-        "BigInt Power Benchmark Summary",
-        sf1,
-        "BigInt10",
-        sf2,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_shift.mojo b/benches/bigint/bench_shift.mojo
deleted file mode 100644
index 01929d1a..00000000
--- a/benches/bigint/bench_shift.mojo
+++ /dev/null
@@ -1,104 +0,0 @@
-"""Benchmarks for BigInt left shift. Compares BigInt vs Python int."""
-
-from decimo.bigint.bigint import BigInt
-import decimo.bigint.arithmetics
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-    log_print(
-        "a: " + bc.a[byte=:80] + (" ..." if len(bc.a) > 80 else ""), log_file
-    )
-    log_print("shift: " + bc.b, log_file)
-
-    var m2a = BigInt(bc.a)
-    var shift = Int(BigInt(bc.b))
-    var py = Python.import_module("builtins")
-    var pa = py.int(bc.a)
-    var pb = py.int(bc.b)
-
-    try:
-        var r2 = m2a << shift
-        var rp = pa << pb
-
-        var r2_str = String(r2)
-        var rp_str = String(rp)
-
-        # Correctness check: string match
-        if r2_str != rp_str:
-            log_print("*** WARNING: String mismatch detected! ***", log_file)
-            log_print("BigInt result:  " + r2_str[byte=:80], log_file)
-            log_print("Python result:   " + rp_str[byte=:80], log_file)
-
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m2a << shift
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = pa << pb
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s2 = Float64(tp) / Float64(t2)
-        sf.append(s2)
-
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("Speedup:         " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_shift")
-    print_header("Decimo BigInt Left Shift Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/shift.toml")
-    var iterations = load_bench_iterations("bench_data/shift.toml")
-    var sf = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " shift benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf)
-
-    print_summary(
-        "BigInt Left Shift Benchmark Summary",
-        sf,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_sqrt.mojo b/benches/bigint/bench_sqrt.mojo
deleted file mode 100644
index d83ef224..00000000
--- a/benches/bigint/bench_sqrt.mojo
+++ /dev/null
@@ -1,123 +0,0 @@
-"""Benchmarks for BigInt integer square root. Compares BigUInt, BigInt, and Python isqrt."""
-
-from decimo.biguint.biguint import BigUInt
-from decimo.bigint.bigint import BigInt
-import decimo.bigint.arithmetics
-import decimo.bigint.exponential
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary_dual,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf_biguint: List[Float64],
-    mut sf_bigint: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-    log_print(
-        "a: " + bc.a[byte=:80] + (" ..." if len(bc.a) > 80 else ""), log_file
-    )
-
-    var m1a = BigUInt(bc.a)
-    var m2a = BigInt(bc.a)
-    var py = Python.import_module("builtins")
-    var math_mod = Python.import_module("math")
-    var pa = py.int(bc.a)
-
-    try:
-        var r1 = m1a.sqrt()
-        var r2 = m2a.sqrt()
-        var rp = math_mod.isqrt(pa)
-
-        var r1_str = String(r1)
-        var r2_str = String(r2)
-        var rp_str = String(rp)
-
-        # Correctness check: string match
-        if r1_str != rp_str or r2_str != rp_str:
-            log_print("*** WARNING: String mismatch detected! ***", log_file)
-            log_print("BigUInt result:  " + r1_str[byte=:80], log_file)
-            log_print("BigInt result:  " + r2_str[byte=:80], log_file)
-            log_print("Python result:   " + rp_str[byte=:80], log_file)
-
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m1a.sqrt()
-        var t1 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t1 == 0:
-            t1 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m2a.sqrt()
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = math_mod.isqrt(pa)
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s1 = Float64(tp) / Float64(t1)
-        var s2 = Float64(tp) / Float64(t2)
-        sf_biguint.append(s1)
-        sf_bigint.append(s2)
-
-        log_print("BigUInt:         " + String(t1) + " ns/iter", log_file)
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("BigUInt speedup: " + String(s1) + "×", log_file)
-        log_print("BigInt speedup: " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_sqrt")
-    print_header("Decimo BigInt Square Root Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/sqrt.toml")
-    var iterations = load_bench_iterations("bench_data/sqrt.toml")
-    var sf1 = List[Float64]()
-    var sf2 = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " sqrt benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf1, sf2)
-
-    print_summary_dual(
-        "BigInt Square Root Benchmark Summary",
-        sf1,
-        "BigUInt",
-        sf2,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_to_string.mojo b/benches/bigint/bench_to_string.mojo
deleted file mode 100644
index ecd12db2..00000000
--- a/benches/bigint/bench_to_string.mojo
+++ /dev/null
@@ -1,109 +0,0 @@
-"""Benchmarks for BigInt to_string conversion. Compares BigInt10, BigInt, and Python int."""
-
-from decimo.bigint10.bigint10 import BigInt10
-from decimo.bigint.bigint import BigInt
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary_dual,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf_bigint10: List[Float64],
-    mut sf_bigint: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-
-    var m1 = BigInt10(bc.a)
-    var m2 = BigInt(bc.a)
-    var py = Python.import_module("builtins")
-    var pa = py.int(bc.a)
-
-    log_print("digits:          " + String(len(bc.a)), log_file)
-
-    try:
-        # Verify results match
-        var _r1 = String(m1)
-        var _r2 = String(m2)
-        var _rp = String(py.str(pa))
-
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = String(m1)
-        var t1 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t1 == 0:
-            t1 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = String(m2)
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = py.str(pa)
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s1 = Float64(tp) / Float64(t1)
-        var s2 = Float64(tp) / Float64(t2)
-        sf_bigint10.append(s1)
-        sf_bigint.append(s2)
-
-        log_print("BigInt10:        " + String(t1) + " ns/iter", log_file)
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("BigInt10 speedup:" + String(s1) + "×", log_file)
-        log_print("BigInt speedup: " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_to_string")
-    print_header("Decimo BigInt to_string Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/to_string.toml")
-    var iterations = load_bench_iterations("bench_data/to_string.toml")
-    var sf1 = List[Float64]()
-    var sf2 = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " to_string benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf1, sf2)
-
-    print_summary_dual(
-        "BigInt to_string Benchmark Summary",
-        sf1,
-        "BigInt10",
-        sf2,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_truncate_divide.mojo b/benches/bigint/bench_truncate_divide.mojo
deleted file mode 100644
index 0836f9a6..00000000
--- a/benches/bigint/bench_truncate_divide.mojo
+++ /dev/null
@@ -1,124 +0,0 @@
-"""Benchmarks for BigInt truncate division. Compares BigInt10, BigInt, and Python int."""
-
-from decimo.bigint10.bigint10 import BigInt10
-import decimo.bigint10.arithmetics
-from decimo.bigint.bigint import BigInt
-import decimo.bigint.arithmetics
-from decimo.tests import (
-    BenchCase,
-    load_bench_cases,
-    load_bench_iterations,
-    open_log_file,
-    log_print,
-    print_header,
-    print_summary_dual,
-)
-from std.python import Python, PythonObject
-from std.time import perf_counter_ns
-from std.collections import List
-
-
-def run_case(
-    bc: BenchCase,
-    iterations: Int,
-    log_file: PythonObject,
-    mut sf_bigint10: List[Float64],
-    mut sf_bigint: List[Float64],
-) raises:
-    log_print("\nBenchmark:       " + bc.name, log_file)
-    log_print("a: " + bc.a[byte=:80], log_file)
-    log_print("b: " + bc.b[byte=:80], log_file)
-
-    var m1a = BigInt10(bc.a)
-    var m1b = BigInt10(bc.b)
-    var m2a = BigInt(bc.a)
-    var m2b = BigInt(bc.b)
-    var py = Python.import_module("builtins")
-    var pa = py.int(bc.a)
-    var pb = py.int(bc.b)
-
-    try:
-        var r1 = m1a.truncate_divide(m1b)
-        var r2 = m2a.truncate_divide(m2b)
-        var rp = pa // pb
-
-        var r1_str = String(r1)
-        var r2_str = String(r2)
-        var rp_str = String(rp)
-
-        # Correctness check: string match
-        if r1_str != rp_str or r2_str != rp_str:
-            log_print("*** WARNING: String mismatch detected! ***", log_file)
-            log_print("BigInt10 result: " + r1_str[byte=:120], log_file)
-            log_print("BigInt result:  " + r2_str[byte=:120], log_file)
-            log_print("Python result:   " + rp_str[byte=:120], log_file)
-
-        var t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m1a.truncate_divide(m1b)
-        var t1 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t1 == 0:
-            t1 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = m2a.truncate_divide(m2b)
-        var t2 = (perf_counter_ns() - t0) / UInt(iterations)
-        if t2 == 0:
-            t2 = 1
-
-        t0 = perf_counter_ns()
-        for _ in range(iterations):
-            _ = pa // pb
-        var tp = (perf_counter_ns() - t0) / UInt(iterations)
-
-        var s1 = Float64(tp) / Float64(t1)
-        var s2 = Float64(tp) / Float64(t2)
-        sf_bigint10.append(s1)
-        sf_bigint.append(s2)
-
-        log_print("BigInt10:        " + String(t1) + " ns/iter", log_file)
-        log_print("BigInt:         " + String(t2) + " ns/iter", log_file)
-        log_print("Python:          " + String(tp) + " ns/iter", log_file)
-        log_print("BigInt10 speedup:" + String(s1) + "×", log_file)
-        log_print("BigInt speedup: " + String(s2) + "×", log_file)
-    except e:
-        log_print("Error: " + String(e), log_file)
-        log_print("Skipping this case", log_file)
-
-
-def main() raises:
-    var pysys = Python.import_module("sys")
-    pysys.set_int_max_str_digits(10000000)
-
-    var log_file = open_log_file("benchmark_bigint_truncate_divide")
-    print_header("Decimo BigInt Truncate Division Benchmark", log_file)
-
-    var cases = load_bench_cases("bench_data/truncate_divide.toml")
-    var iterations = load_bench_iterations("bench_data/truncate_divide.toml")
-    var sf1 = List[Float64]()
-    var sf2 = List[Float64]()
-
-    log_print(
-        "\nRunning "
-        + String(len(cases))
-        + " truncate division benchmarks with "
-        + String(iterations)
-        + " iterations each",
-        log_file,
-    )
-
-    for i in range(len(cases)):
-        run_case(cases[i], iterations, log_file, sf1, sf2)
-
-    print_summary_dual(
-        "BigInt Truncate Division Benchmark Summary",
-        sf1,
-        "BigInt10",
-        sf2,
-        "BigInt",
-        iterations,
-        log_file,
-    )
-    log_file.close()
-    print("Benchmark completed. Log file closed.")
diff --git a/benches/bigint/bench_data/add.toml b/benches/bigint/cases/add.toml
similarity index 100%
rename from benches/bigint/bench_data/add.toml
rename to benches/bigint/cases/add.toml
diff --git a/benches/bigint/bench_data/floor_divide.toml b/benches/bigint/cases/floor_divide.toml
similarity index 100%
rename from benches/bigint/bench_data/floor_divide.toml
rename to benches/bigint/cases/floor_divide.toml
diff --git a/benches/bigint/bench_data/from_string.toml b/benches/bigint/cases/from_string.toml
similarity index 100%
rename from benches/bigint/bench_data/from_string.toml
rename to benches/bigint/cases/from_string.toml
diff --git a/benches/bigint/bench_data/multiply.toml b/benches/bigint/cases/multiply.toml
similarity index 100%
rename from benches/bigint/bench_data/multiply.toml
rename to benches/bigint/cases/multiply.toml
diff --git a/benches/bigint/bench_data/power.toml b/benches/bigint/cases/power.toml
similarity index 100%
rename from benches/bigint/bench_data/power.toml
rename to benches/bigint/cases/power.toml
diff --git a/benches/bigint/bench_data/shift.toml b/benches/bigint/cases/shift.toml
similarity index 100%
rename from benches/bigint/bench_data/shift.toml
rename to benches/bigint/cases/shift.toml
diff --git a/benches/bigint/bench_data/sqrt.toml b/benches/bigint/cases/sqrt.toml
similarity index 100%
rename from benches/bigint/bench_data/sqrt.toml
rename to benches/bigint/cases/sqrt.toml
diff --git a/benches/bigint/bench_data/to_string.toml b/benches/bigint/cases/to_string.toml
similarity index 100%
rename from benches/bigint/bench_data/to_string.toml
rename to benches/bigint/cases/to_string.toml
diff --git a/benches/bigint/mojo/bench.mojo b/benches/bigint/mojo/bench.mojo
new file mode 100644
index 00000000..1c9620c8
--- /dev/null
+++ b/benches/bigint/mojo/bench.mojo
@@ -0,0 +1,296 @@
+# Cross-language BigInt benchmark — Mojo (decimo.BigInt) side.
+#
+# Reads cases/<op>.toml (shared across languages), expands {C,N} repeat
+# patterns, auto-tunes iteration count to ~50ms per case, and emits one CSV
+# record per case to logs/mojo_<op>_<ts>.csv. Schema (mirrors decimal128/):
+#
+#     timestamp,language,op,case_name,result,ns_per_iter
+#
+# Unlike the BigDecimal harness there is NO precision parameter: BigInt
+# arithmetic is always exact.
+#
+# Usage:
+#   pixi run mojo run -I ../../../src --debug-level=line-tables -D ASSERT=none \
+#       ./bench.mojo --op multiply --cases-dir ../cases --logs-dir ../logs
+#
+# Available ops: add, multiply, floor_divide, power, shift, sqrt,
+#                from_string, to_string.
+
+from decimo.bigint.bigint import BigInt
+import decimo.bigint.arithmetics
+import decimo.bigint.exponential
+from decimo.tests import (
+    BenchCase,
+    load_bench_cases,
+    load_bench_iterations,
+)
+from std.benchmark import keep
+from std.python import Python
+from std.sys import argv as sys_argv
+from std.time import perf_counter_ns
+
+
+def _now_stamp() raises -> String:
+    var dt = Python.import_module("datetime")
+    var now = dt.datetime.now(dt.timezone.utc)
+    return String(now.strftime("%Y%m%d_%H%M%S"))
+
+
+def _csv_quote(s: String) -> String:
+    var needs = False
+    for ch in s.codepoint_slices():
+        if ch == "," or ch == '"' or ch == "\n" or ch == "\r":
+            needs = True
+            break
+    if not needs:
+        return s
+    var out = String('"')
+    for ch in s.codepoint_slices():
+        if ch == '"':
+            out += '""'
+        else:
+            out += String(ch)
+    out += '"'
+    return out
+
+
+def _result_for(
+    op: String,
+    read a: BigInt,
+    read b: BigInt,
+    a_str: String,
+    b_int: Int,
+) raises -> String:
+    """Display path: produce the result string ONCE per case.
+
+    Never call this inside a timing loop — use `_time_kernel` instead.
+    """
+    if op == "add":
+        return String(a + b)
+    if op == "multiply":
+        return String(a * b)
+    if op == "floor_divide":
+        return String(a // b)
+    if op == "power":
+        return String(a**b_int)
+    if op == "shift":
+        return String(a << b_int)
+    if op == "sqrt":
+        return String(a.sqrt())
+    if op == "from_string":
+        return String(BigInt(a_str))
+    if op == "to_string":
+        return String(a)
+    raise Error("unknown op: " + op)
+
+
+def _time_kernel(
+    op: String,
+    read a: BigInt,
+    read b: BigInt,
+    a_str: String,
+    b_int: Int,
+) raises:
+    """Pure-numeric kernel for the timing loop.
+
+    Performs the same work as `_result_for` but renders to a String only
+    for the `to_string` / `from_string` ops (where parsing / rendering IS
+    the operation under measurement). For every other op it uses
+    `keep(...)` on a small derivative of the result (`len(words)`,
+    `sign`) to prevent dead-code elimination while keeping the keep cost
+    negligible versus the op.
+
+    Operands `a` / `b` are taken as `read` (borrowed) so no per-iter
+    deep copy of the heap-backed word list occurs.
+    """
+    if op == "add":
+        var r = a + b
+        keep(len(r.words))
+        keep(r.sign)
+        return
+    if op == "multiply":
+        var r = a * b
+        keep(len(r.words))
+        keep(r.sign)
+        return
+    if op == "floor_divide":
+        var r = a // b
+        keep(len(r.words))
+        keep(r.sign)
+        return
+    if op == "power":
+        var r = a**b_int
+        keep(len(r.words))
+        keep(r.sign)
+        return
+    if op == "shift":
+        var r = a << b_int
+        keep(len(r.words))
+        keep(r.sign)
+        return
+    if op == "sqrt":
+        var r = a.sqrt()
+        keep(len(r.words))
+        keep(r.sign)
+        return
+    if op == "from_string":
+        var v = BigInt(a_str)
+        keep(len(v.words))
+        keep(v.sign)
+        return
+    if op == "to_string":
+        var s = String(a)
+        keep(s.byte_length())
+        return
+    raise Error("unknown op: " + op)
+
+
+# Auto-tune iters: target ~50ms per timed run.
+# Includes a resolution floor (≥100µs total per rep) so cheap ops don't
+# collapse to <1 timer-tick and report 0 ns/iter. Returns (iters, reps):
+# reps shrinks to 1 for very-slow ops to bound wall time per case at ~500ms.
+def _tune_iters(initial_ns: UInt, hint_iters: Int) -> Tuple[Int, Int]:
+    comptime TARGET_NS: UInt = 50_000_000  # 50ms per rep target
+    comptime MIN_RES_NS: UInt = 100_000  # 100µs floor for resolution
+    comptime MAX_WALL_NS: UInt = 500_000_000  # 500ms total per case
+    var cal = initial_ns if initial_ns > 0 else UInt(1)
+    var n = Int(TARGET_NS // cal)
+    var n_min_res = Int(MIN_RES_NS // cal)
+    if n < n_min_res:
+        n = n_min_res
+    if n < 3:
+        n = 3
+    if n > hint_iters:
+        n = hint_iters
+    if n < 1:
+        n = 1
+    var per_rep = UInt(n) * cal
+    var reps = 3
+    if per_rep > 0:
+        var r = Int(MAX_WALL_NS // per_rep)
+        if r < 1:
+            r = 1
+        if r > 3:
+            r = 3
+        reps = r
+    return Tuple[Int, Int](n, reps)
+
+
+def _bench_case(
+    op: String,
+    bc: BenchCase,
+    iter_hint: Int,
+) raises -> Tuple[String, Float64]:
+    """Compute result + best-of-N ns/iter (auto-tuned)."""
+    # Build operands once.
+    var a = BigInt(bc.a)
+
+    # `b` handling per op:
+    #   - power / shift: b encodes the (small) integer exponent / shift count
+    #   - sqrt / from_string / to_string: unary, no b
+    #   - add / multiply / floor_divide: b is the second BigInt operand
+    var b: BigInt
+    var b_int: Int = 0
+    if op == "power" or op == "shift":
+        b = BigInt.from_int(0)
+        b_int = Int(BigInt(bc.b))
+    elif op == "sqrt" or op == "from_string" or op == "to_string" or bc.b == "":
+        b = BigInt.from_int(0)
+    else:
+        b = BigInt(bc.b)
+
+    # Compute the displayed `result` ONCE per case (outside any timing loop).
+    var result = _result_for(op, a, b, bc.a, b_int)
+
+    # Calibration: time 1 rep to estimate per-iter cost.
+    var cal_iters: Int = 1
+    var t0 = perf_counter_ns()
+    for _ in range(cal_iters):
+        _time_kernel(op, a, b, bc.a, b_int)
+    var cal_ns = UInt(perf_counter_ns() - t0)
+    var tuned = _tune_iters(cal_ns, iter_hint)
+    var iters = tuned[0]
+    var reps = tuned[1]
+
+    # Best-of-N timing (N = reps, adaptive).
+    var best: Int = 0x7FFF_FFFF_FFFF_FFFF
+    for _ in range(reps):
+        var t1 = perf_counter_ns()
+        for _ in range(iters):
+            _time_kernel(op, a, b, bc.a, b_int)
+        var dt = Int(perf_counter_ns() - t1)
+        if dt < best:
+            best = dt
+    return Tuple[String, Float64](result, Float64(best) / Float64(iters))
+
+
+def _pad(s: String, w: Int) -> String:
+    if s.byte_length() >= w:
+        return s
+    var out = s
+    for _ in range(w - s.byte_length()):
+        out += " "
+    return out
+
+
+def main() raises:
+    var pysys = Python.import_module("sys")
+    pysys.set_int_max_str_digits(10000000)
+
+    var argv = sys_argv()
+    var op = String("add")
+    var cases_dir = String("../cases")
+    var logs_dir = String("../logs")
+    var i = 1
+    while i < len(argv):
+        var arg = String(argv[i])
+        if arg == "--op":
+            op = String(argv[i + 1])
+            i += 2
+        elif arg == "--cases-dir":
+            cases_dir = String(argv[i + 1])
+            i += 2
+        elif arg == "--logs-dir":
+            logs_dir = String(argv[i + 1])
+            i += 2
+        else:
+            i += 1
+
+    var toml_path = cases_dir + "/" + op + ".toml"
+    var iter_hint = load_bench_iterations(toml_path)
+    var cases = load_bench_cases(toml_path)
+
+    var os_mod = Python.import_module("os")
+    if not os_mod.path.exists(logs_dir):
+        os_mod.makedirs(logs_dir)
+    var ts = _now_stamp()
+    var log_path = logs_dir + "/mojo_" + op + "_" + ts + ".csv"
+    var py = Python.import_module("builtins")
+    var log = py.open(log_path, "w")
+    log.write("timestamp,language,op,case_name,result,ns_per_iter\n")
+
+    print("# decimo.BigInt", op, "(hint=", iter_hint, ")")
+    print(_pad("case", 44), _pad("result", 36), "ns/iter")
+    for ref bc in cases:
+        var pair = _bench_case(op, bc, iter_hint)
+        var result = pair[0]
+        var per = pair[1]
+        var rs = result if result.byte_length() <= 34 else String(
+            result[byte=0:34]
+        )
+        print(_pad(bc.name, 44), _pad(rs, 36), per)
+        log.write(
+            ts
+            + ",mojo,"
+            + op
+            + ","
+            + _csv_quote(bc.name)
+            + ","
+            + _csv_quote(result)
+            + ","
+            + String(per)
+            + "\n"
+        )
+    log.flush()
+    log.close()
+    print("wrote", log_path)
diff --git a/benches/bigint/python/bench.py b/benches/bigint/python/bench.py
new file mode 100644
index 00000000..f614b4ca
--- /dev/null
+++ b/benches/bigint/python/bench.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3
+"""Cross-language BigInt benchmark — Python (int) side.
+
+Reads cases/<op>.toml, expands `{C,N}` patterns, auto-tunes iteration count
+to ~50ms per case, and emits one CSV per case to logs/python_<op>_<ts>.csv:
+
+    timestamp,language,op,case_name,result,ns_per_iter
+
+Python's arbitrary-precision `int` is the **oracle**: the aggregator marks
+the `match` column as OK iff every other language's result string equals
+Python's. BigInt arithmetic is exact, so there is no precision parameter.
+
+Usage:
+    python3 bench.py --op multiply --cases-dir ../cases --logs-dir ../logs
+"""
+
+from __future__ import annotations
+
+import argparse
+import csv
+import math
+import os
+import re
+import sys
+import time
+from datetime import datetime, timezone
+
+try:
+    import tomllib  # py 3.11+
+except ImportError:
+    import tomli as tomllib  # type: ignore
+
+# BigInt cases reach tens of thousands of decimal digits; lift CPython's
+# int<->str conversion guard (default 4300) so from_string / to_string work.
+sys.set_int_max_str_digits(10_000_000)
+
+PATTERN_RE = re.compile(r"\{([^{}]*),(\d+)\}")
+
+
+def expand(s: str) -> str:
+    """Expand `{C,N}` repeat patterns. Last comma wins (matches Mojo)."""
+    out = []
+    i = 0
+    while i < len(s):
+        if s[i] == "{":
+            close = s.find("}", i + 1)
+            if close < 0:
+                out.append(s[i])
+                i += 1
+                continue
+            inner = s[i + 1 : close]
+            comma = inner.rfind(",")
+            if comma < 0:
+                out.append(s[i : close + 1])
+                i = close + 1
+                continue
+            payload = inner[:comma]
+            try:
+                n = int(inner[comma + 1 :])
+            except ValueError:
+                out.append(s[i : close + 1])
+                i = close + 1
+                continue
+            out.append(payload * n)
+            i = close + 1
+        else:
+            out.append(s[i])
+            i += 1
+    return "".join(out)
+
+
+# ----- per-op kernels ---------------------------------------------------
+
+
+def make_kernel(op: str, a: str, b: str):
+    """Return (display_result_str, kernel_callable_no_args)."""
+    da = int(a)
+    # `b` is the second operand (add/multiply/floor_divide) or the small
+    # integer exponent / shift count (power/shift). Unary ops ignore it.
+    db = int(b) if b not in ("", None) else 0
+
+    if op == "add":
+        return str(da + db), (lambda: da + db)
+    if op == "multiply":
+        return str(da * db), (lambda: da * db)
+    if op == "floor_divide":
+        return str(da // db), (lambda: da // db)
+    if op == "power":
+        return str(da**db), (lambda: da**db)
+    if op == "shift":
+        return str(da << db), (lambda: da << db)
+    if op == "sqrt":
+        return str(math.isqrt(da)), (lambda: math.isqrt(da))
+    if op == "from_string":
+        return str(int(a)), (lambda: int(a))
+    if op == "to_string":
+        return str(da), (lambda: str(da))
+    raise ValueError(f"unknown op: {op}")
+
+
+# ----- timing -----------------------------------------------------------
+
+TARGET_NS = 50_000_000
+MIN_RES_NS = 100_000  # 100µs floor per rep for resolution
+MAX_WALL_NS = 500_000_000  # 500ms total wall per case
+
+
+def bench_kernel(kernel, iter_hint: int) -> float:
+    """Return best-of-N ns/iter, auto-tuned.
+
+    Mirrors the Mojo harness: target ~50ms per rep, a 100µs resolution
+    floor so cheap ops do not collapse to 0 ns/iter, and an adaptive
+    `reps` (3 -> 1) bounding wall time per case at ~500ms.
+    """
+    t0 = time.perf_counter_ns()
+    r = kernel()
+    cal = time.perf_counter_ns() - t0
+    if cal <= 0:
+        cal = 1
+    n = TARGET_NS // cal
+    n_min_res = MIN_RES_NS // cal
+    if n < n_min_res:
+        n = n_min_res
+    if n < 3:
+        n = 3
+    if n > iter_hint:
+        n = iter_hint
+    if n < 1:
+        n = 1
+    iters = int(n)
+    per_rep = iters * cal
+    reps = 3
+    if per_rep > 0:
+        reps = max(1, min(3, MAX_WALL_NS // per_rep))
+    best = 1 << 62
+    for _ in range(int(reps)):
+        t0 = time.perf_counter_ns()
+        for _ in range(iters):
+            r = kernel()
+        dt = time.perf_counter_ns() - t0
+        if dt < best:
+            best = dt
+    _ = r
+    return best / iters
+
+
+# ----- main -------------------------------------------------------------
+
+
+def main() -> int:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--op", required=True)
+    ap.add_argument("--cases-dir", default="../cases")
+    ap.add_argument("--logs-dir", default="../logs")
+    args = ap.parse_args()
+
+    toml_path = os.path.join(args.cases_dir, f"{args.op}.toml")
+    with open(toml_path, "rb") as f:
+        doc = tomllib.load(f)
+    cfg = doc.get("config", {})
+    iter_hint = int(cfg.get("iterations", 1000))
+    cases = doc.get("cases", [])
+
+    os.makedirs(args.logs_dir, exist_ok=True)
+    ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+    log_path = os.path.join(args.logs_dir, f"python_{args.op}_{ts}.csv")
+    print(f"# python int {args.op} (hint={iter_hint})")
+    print(f"{'case':<44}{'result':<36}ns/iter")
+    with open(log_path, "w", newline="") as fout:
+        w = csv.writer(fout, lineterminator="\n")
+        w.writerow(
+            [
+                "timestamp",
+                "language",
+                "op",
+                "case_name",
+                "result",
+                "ns_per_iter",
+            ]
+        )
+        for c in cases:
+            name = c["name"]
+            a = expand(c["a"])
+            b = expand(c.get("b", "")) if c.get("b") not in (None, "") else ""
+            try:
+                result, kernel = make_kernel(args.op, a, b)
+                per_ns = bench_kernel(kernel, iter_hint)
+            except Exception as exc:
+                result = f"ERR: {exc.__class__.__name__}: {exc}"
+                per_ns = 0.0
+            short = result if len(result) <= 34 else result[:34]
+            print(f"{name:<44}{short:<36}{per_ns:.2f}")
+            w.writerow([ts, "python", args.op, name, result, f"{per_ns:.4f}"])
+    print(f"wrote {log_path}", file=sys.stderr)
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/benches/bigint/run_all.sh b/benches/bigint/run_all.sh
new file mode 100755
index 00000000..1dc865ea
--- /dev/null
+++ b/benches/bigint/run_all.sh
@@ -0,0 +1,105 @@
+#!/usr/bin/env bash
+# Run BigInt benchmarks for mojo (decimo) / python / rust, then write a
+# timestamped markdown report under reports/.
+#
+# Raw per-language CSV logs land in   logs/{lang}_{op}_{ts}.csv
+# Aggregated markdown report lands in reports/bigint_report_utc_{ts}.md
+#
+# BigInt is exact: there is NO precision parameter.
+#
+# Usage:
+#   ./run_all.sh                          # all default ops
+#   ./run_all.sh --ops multiply power     # subset of ops
+#
+# Default ops: add multiply floor_divide power shift sqrt from_string to_string
+set -uo pipefail
+
+cd "$(dirname "$0")"
+
+OPS=()
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --ops) shift ;;
+    --)    shift; break ;;
+    -*)    echo "Unknown flag: $1" >&2; exit 2 ;;
+    *)     OPS+=("$1"); shift ;;
+  esac
+done
+
+if [ ${#OPS[@]} -eq 0 ]; then
+  OPS=(add multiply floor_divide power shift sqrt from_string to_string)
+fi
+
+mkdir -p logs reports
+
+# Always purge any prior *.csv so an old run's data does not leak into the
+# aggregated report.
+rm -f logs/*.csv 2>/dev/null || true
+
+PIXI_RUN="pixi run --manifest-path ../../../pixi.toml"
+PIXI_RUN_TOP="pixi run --manifest-path ../../pixi.toml"
+
+# ---- Build Mojo harness ----
+HAVE_MOJO=0
+echo ">>> Building Mojo harness (release: -O3, no debug, no asserts)..."
+if (cd mojo && $PIXI_RUN mojo build \
+     -I ../../../src -O3 -g0 -D ASSERT=none ./bench.mojo -o ./bench); then
+  HAVE_MOJO=1
+else
+  echo "!!! Mojo build failed; skipping mojo harness."
+fi
+
+# ---- Build Rust harness ----
+HAVE_RUST=0
+if command -v cargo >/dev/null 2>&1; then
+  echo ">>> Building Rust harness (release)..."
+  if (cd rust && cargo build --release --quiet); then
+    HAVE_RUST=1
+    RUST_BIN="$(pwd)/rust/target/release/bench"
+  else
+    echo "!!! Rust build failed; skipping rust harness."
+  fi
+else
+  echo ">>> cargo not found; skipping rust harness."
+fi
+
+# ---- Check Python (needs tomllib OR tomli) ----
+HAVE_PY=0
+if $PIXI_RUN_TOP python3 -c 'import tomllib' 2>/dev/null \
+   || $PIXI_RUN_TOP python3 -c 'import tomli' 2>/dev/null; then
+  HAVE_PY=1
+else
+  echo ">>> python (with tomllib >=3.11 or tomli) not available; skipping python harness."
+fi
+
+run_step() {
+  local label="$1"; shift
+  echo "--- $label ---"
+  "$@" || echo "!!! $label failed (continuing)"
+}
+
+for op in "${OPS[@]}"; do
+  echo
+  echo "===== $op ====="
+
+  if [ "$HAVE_MOJO" = "1" ]; then
+    run_step "decimo (mojo)" \
+      bash -c "cd mojo && $PIXI_RUN ./bench --op '$op' --cases-dir ../cases --logs-dir ../logs"
+  fi
+  if [ "$HAVE_PY" = "1" ]; then
+    run_step "python int" \
+      bash -c "cd python && $PIXI_RUN python3 bench.py --op '$op' --cases-dir ../cases --logs-dir ../logs"
+  fi
+  if [ "$HAVE_RUST" = "1" ]; then
+    run_step "rust num-bigint" \
+      "$RUST_BIN" --op "$op" --cases-dir "$(pwd)/cases" --logs-dir "$(pwd)/logs"
+  fi
+done
+
+echo
+echo ">>> Aggregating into report..."
+$PIXI_RUN_TOP python3 ./aggregate.py \
+    --logs-dir logs \
+    --reports-dir reports \
+    --langs mojo python rust \
+    --ops "${OPS[@]}"
diff --git a/benches/bigint/rust/.gitignore b/benches/bigint/rust/.gitignore
new file mode 100644
index 00000000..ea8c4bf7
--- /dev/null
+++ b/benches/bigint/rust/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/benches/bigint/rust/Cargo.toml b/benches/bigint/rust/Cargo.toml
new file mode 100644
index 00000000..3d08b74f
--- /dev/null
+++ b/benches/bigint/rust/Cargo.toml
@@ -0,0 +1,17 @@
+[package]
+edition = "2021"
+name = "bench"
+version = "0.1.0"
+
+[dependencies]
+num-bigint = "0.4"
+num-integer = "0.1"
+num-traits = "0.2"
+serde = {version = "1", features = ["derive"]}
+toml = "0.8"
+
+[profile.release]
+codegen-units = 1
+debug = 1
+lto = "fat"
+opt-level = 3
diff --git a/benches/bigint/rust/src/main.rs b/benches/bigint/rust/src/main.rs
new file mode 100644
index 00000000..4ebf0646
--- /dev/null
+++ b/benches/bigint/rust/src/main.rs
@@ -0,0 +1,312 @@
+// Cross-language BigInt benchmark — Rust side (num-bigint).
+//
+// Reads cases/<op>.toml (shared with the Mojo / Python sides), expands
+// `{C,N}` repeat patterns, auto-tunes the iteration count to ~50ms per
+// case, and emits one CSV record per case to stdout AND to
+// `logs/rust_<op>_<ts>.csv`. Schema:
+//
+//     timestamp,language,op,case_name,result,ns_per_iter
+//
+// BigInt arithmetic is exact, so there is no precision parameter.
+//
+// Usage:  cargo run --release --quiet -- --op add [--cases-dir DIR]
+//                                                  [--logs-dir DIR]
+//
+// Available ops: add, multiply, floor_divide, power, shift, sqrt,
+//                from_string, to_string.
+
+use num_bigint::BigInt;
+use num_integer::Integer;
+use num_traits::Pow;
+use serde::Deserialize;
+use std::env;
+use std::fs;
+use std::hint::black_box;
+use std::io::Write;
+use std::path::PathBuf;
+use std::str::FromStr;
+use std::time::Instant;
+
+#[derive(Debug, Deserialize)]
+struct Doc {
+    config: Option<Config>,
+    cases: Vec<Case>,
+}
+
+#[derive(Debug, Deserialize)]
+struct Config {
+    iterations: Option<u64>,
+}
+
+#[derive(Debug, Deserialize, Clone)]
+struct Case {
+    name: String,
+    a: String,
+    #[serde(default)]
+    b: String,
+}
+
+/// Expand `{C,N}` repeat patterns. `{9,3}` → "999", `1{0,4}2` → "100002".
+fn expand(s: &str) -> String {
+    let mut out = String::new();
+    let bytes = s.as_bytes();
+    let mut i = 0;
+    while i < bytes.len() {
+        if bytes[i] == b'{' {
+            if let Some(close_rel) = s[i + 1..].find('}') {
+                let close = i + 1 + close_rel;
+                let inner = &s[i + 1..close];
+                if let Some(comma) = inner.rfind(',') {
+                    let payload = &inner[..comma];
+                    if let Ok(n) = inner[comma + 1..].parse::<usize>() {
+                        for _ in 0..n {
+                            out.push_str(payload);
+                        }
+                        i = close + 1;
+                        continue;
+                    }
+                }
+            }
+        }
+        out.push(bytes[i] as char);
+        i += 1;
+    }
+    out
+}
+
+// ----- timing config (mirrors the Mojo / Python harnesses) --------------
+const TARGET_NS: u128 = 50_000_000; // 50ms per rep target
+const MIN_RES_NS: u128 = 100_000; // 100µs floor per rep for resolution
+const MAX_WALL_NS: u128 = 500_000_000; // 500ms total wall per case
+
+/// Run `iters` reps of `op` on the prepared operands. The result is fed to
+/// `black_box` so the optimizer cannot elide the work.
+fn run_iters(op: &str, da: &BigInt, db: &BigInt, exp: u32, shift: usize, a: &str, iters: u64) {
+    match op {
+        "add" => {
+            for _ in 0..iters {
+                black_box(black_box(da) + black_box(db));
+            }
+        }
+        "multiply" => {
+            for _ in 0..iters {
+                black_box(black_box(da) * black_box(db));
+            }
+        }
+        "floor_divide" => {
+            for _ in 0..iters {
+                black_box(black_box(da).div_floor(black_box(db)));
+            }
+        }
+        "power" => {
+            for _ in 0..iters {
+                black_box(Pow::pow(black_box(da), black_box(exp)));
+            }
+        }
+        "shift" => {
+            for _ in 0..iters {
+                black_box(black_box(da) << black_box(shift));
+            }
+        }
+        "sqrt" => {
+            for _ in 0..iters {
+                black_box(black_box(da).sqrt());
+            }
+        }
+        "from_string" => {
+            for _ in 0..iters {
+                black_box(BigInt::from_str(black_box(a)).expect("parse"));
+            }
+        }
+        "to_string" => {
+            for _ in 0..iters {
+                black_box(black_box(da).to_string());
+            }
+        }
+        other => panic!("unknown op {other}"),
+    }
+}
+
+fn compute_result(op: &str, da: &BigInt, db: &BigInt, exp: u32, shift: usize, a: &str) -> String {
+    match op {
+        "add" => (da + db).to_string(),
+        "multiply" => (da * db).to_string(),
+        "floor_divide" => da.div_floor(db).to_string(),
+        "power" => Pow::pow(da, exp).to_string(),
+        "shift" => (da << shift).to_string(),
+        "sqrt" => da.sqrt().to_string(),
+        "from_string" => BigInt::from_str(a).expect("parse").to_string(),
+        "to_string" => da.to_string(),
+        other => panic!("unknown op {other}"),
+    }
+}
+
+fn run_op(op: &str, a: &str, b: &str, iter_hint: u64) -> (String, f64) {
+    let da = BigInt::from_str(a).expect("from_str a");
+    // `b` is the second operand (add/multiply/floor_divide) or a small
+    // integer (power exponent / shift count). Unary ops leave it empty.
+    // Fail fast on a malformed non-empty operand, consistent with `a`, so
+    // bad case data cannot masquerade as a valid benchmark run.
+    let db = if b.is_empty() {
+        BigInt::from(0)
+    } else {
+        BigInt::from_str(b).expect("from_str b")
+    };
+    let exp: u32 = if op == "power" {
+        b.parse::<u32>().expect("power exponent")
+    } else {
+        0
+    };
+    let shift: usize = if op == "shift" {
+        b.parse::<usize>().expect("shift count")
+    } else {
+        0
+    };
+
+    // Displayed result, computed once (string form for cross-lang diff).
+    let result_str = compute_result(op, &da, &db, exp, shift, a);
+
+    // Calibrate one rep to estimate per-iter cost.
+    let t0 = Instant::now();
+    run_iters(op, &da, &db, exp, shift, a, 1);
+    let mut cal = t0.elapsed().as_nanos();
+    if cal == 0 {
+        cal = 1;
+    }
+    let mut n = TARGET_NS / cal;
+    let n_min_res = MIN_RES_NS / cal;
+    if n < n_min_res {
+        n = n_min_res;
+    }
+    if n < 3 {
+        n = 3;
+    }
+    if n > iter_hint as u128 {
+        n = iter_hint as u128;
+    }
+    if n < 1 {
+        n = 1;
+    }
+    let iters = n as u64;
+    let per_rep = (iters as u128) * cal;
+    let reps: u32 = if per_rep > 0 {
+        (MAX_WALL_NS / per_rep).clamp(1, 3) as u32
+    } else {
+        3
+    };
+
+    // Best-of-N timing.
+    let mut best = u128::MAX;
+    for _ in 0..reps {
+        let t0 = Instant::now();
+        run_iters(op, &da, &db, exp, shift, a, iters);
+        let dt = t0.elapsed().as_nanos();
+        if dt < best {
+            best = dt;
+        }
+    }
+    let per = best as f64 / iters as f64;
+    (result_str, per)
+}
+
+fn main() {
+    let mut op = String::from("add");
+    let mut cases_dir = PathBuf::from("../cases");
+    let mut logs_dir = PathBuf::from("../logs");
+    let args: Vec<String> = env::args().collect();
+    let mut i = 1;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--op" => {
+                op = args[i + 1].clone();
+                i += 2;
+            }
+            "--cases-dir" => {
+                cases_dir = PathBuf::from(&args[i + 1]);
+                i += 2;
+            }
+            "--logs-dir" => {
+                logs_dir = PathBuf::from(&args[i + 1]);
+                i += 2;
+            }
+            _ => i += 1,
+        }
+    }
+
+    let toml_path = cases_dir.join(format!("{op}.toml"));
+    let raw = fs::read_to_string(&toml_path)
+        .unwrap_or_else(|e| panic!("read {}: {e}", toml_path.display()));
+    let doc: Doc = toml::from_str(&raw).expect("toml parse");
+    let iter_hint = doc
+        .config
+        .as_ref()
+        .and_then(|c| c.iterations)
+        .unwrap_or(1000);
+
+    fs::create_dir_all(&logs_dir).ok();
+    let ts = chrono_now();
+    let log_path = logs_dir.join(format!("rust_{op}_{ts}.csv"));
+    let mut log = fs::File::create(&log_path).expect("open log");
+    writeln!(log, "timestamp,language,op,case_name,result,ns_per_iter").ok();
+
+    println!("# num-bigint {} (hint={})", op, iter_hint);
+    println!("{:<40} {:<32} {:>12}", "case", "result", "ns/iter");
+    for case in &doc.cases {
+        let a = expand(&case.a);
+        let b = expand(&case.b);
+        let (result, per) = run_op(&op, &a, &b, iter_hint);
+        let result_short: String = result.chars().take(30).collect();
+        println!("{:<40} {:<32} {:>12.3}", case.name, result_short, per);
+        writeln!(
+            log,
+            "{},rust,{},{},{},{:.4}",
+            ts,
+            op,
+            csv_quote(&case.name),
+            csv_quote(&result),
+            per
+        )
+        .ok();
+    }
+    eprintln!("wrote {}", log_path.display());
+}
+
+fn csv_quote(s: &str) -> String {
+    if s.contains(',') || s.contains('"') || s.contains('\n') || s.contains('\r') {
+        format!("\"{}\"", s.replace('"', "\"\""))
+    } else {
+        s.to_string()
+    }
+}
+
+/// Tiny timestamp without pulling in chrono. Format: YYYYMMDD_HHMMSS (UTC).
+fn chrono_now() -> String {
+    use std::time::SystemTime;
+    let secs = SystemTime::now()
+        .duration_since(SystemTime::UNIX_EPOCH)
+        .unwrap()
+        .as_secs();
+    let (y, m, d, hh, mm, ss) = unix_to_ymd_hms(secs);
+    format!("{:04}{:02}{:02}_{:02}{:02}{:02}", y, m, d, hh, mm, ss)
+}
+
+fn unix_to_ymd_hms(secs: u64) -> (u32, u32, u32, u32, u32, u32) {
+    let days = secs / 86400;
+    let rem = secs % 86400;
+    let hh = (rem / 3600) as u32;
+    let mm = ((rem % 3600) / 60) as u32;
+    let ss = (rem % 60) as u32;
+
+    // Civil-from-days algorithm (Howard Hinnant).
+    let z = days as i64 + 719468;
+    let era = (if z >= 0 { z } else { z - 146096 }) / 146097;
+    let doe = (z - era * 146097) as u64;
+    let yoe = (doe - doe / 1460 + doe / 36524 - doe / 146096) / 365;
+    let y = yoe as i64 + era * 400;
+    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
+    let mp = (5 * doy + 2) / 153;
+    let d = doy - (153 * mp + 2) / 5 + 1;
+    let m = if mp < 10 { mp + 3 } else { mp - 9 };
+    let y = y + (if m <= 2 { 1 } else { 0 });
+    (y as u32, m as u32, d as u32, hh, mm, ss)
+}
diff --git a/benches/decimal128/aggregate.py b/benches/decimal128/aggregate.py
index a2518bbf..a0a1c38d 100644
--- a/benches/decimal128/aggregate.py
+++ b/benches/decimal128/aggregate.py
@@ -8,7 +8,7 @@
     2. Per-op detail tables (rows = test cases, columns = languages)
     3. Result-equivalence summary (per-op match rate)
 
-Output goes to <reports-dir>/dec128_report_{ts}.md by default.
+Output goes to <reports-dir>/dec128_report_utc_{ts}.md by default.
 
 Usage:
     python3 aggregate.py --logs-dir logs --reports-dir reports \\
@@ -199,7 +199,7 @@ def main() -> int:
     ap.add_argument(
         "--out",
         default=None,
-        help="Override report path (default: <reports-dir>/dec128_report_<ts>.md)",
+        help="Override report path (default: <reports-dir>/dec128_report_utc_<ts>.md)",
     )
     args = ap.parse_args()
 
@@ -218,7 +218,7 @@ def main() -> int:
     else:
         offset_str = "UTC"
     header_ts = f"{now_local.strftime('%Y-%m-%d %H:%M:%S')} ({offset_str})"
-    out_path = args.out or os.path.join(args.reports_dir, f"dec128_report_{ts}.md")
+    out_path = args.out or os.path.join(args.reports_dir, f"dec128_report_utc_{ts}.md")
 
     lang_label = {
         "mojo": "decimo",
diff --git a/benches/run_bench.sh b/benches/run_bench.sh
index c7b0ddb1..edba8bf3 100755
--- a/benches/run_bench.sh
+++ b/benches/run_bench.sh
@@ -22,7 +22,7 @@ if [[ -z "$TYPE" ]]; then
     echo "Usage: pixi run bench <type> [operation]"
     echo ""
     echo "Types:"
-    echo "  bigint   (int)        BigInt benchmarks (BigInt10 vs BigInt vs Python int)"
+    echo "  bigint   (int)        BigInt benchmarks (decimo vs Python int vs Rust num-bigint)"
     echo "  biguint  (uint)       BigUInt benchmarks (BigUInt vs Python int)"
     echo "  decimal128 (dec128)   Decimal128 benchmarks (Decimal128 vs Python decimal)"
     echo "  bigdecimal (dec)      BigDecimal benchmarks (BigDecimal vs Python decimal)"
@@ -91,6 +91,23 @@ if [[ "$TYPE" == "bigdecimal" ]]; then
     fi
 fi
 
+# --- bigint: cross-language pipeline (decimo + python + rust) ---
+# When OP is empty, run the full op set across all available languages and
+# produce a timestamped report under benches/bigint/reports/. The Rust
+# (num-bigint) harness is built and run only if `cargo` is on PATH;
+# otherwise the pipeline runs with the available languages. When OP is
+# given, restrict the run to that single op via --ops. Extra args after OP
+# are forwarded verbatim to run_all.sh.
+if [[ "$TYPE" == "bigint" ]]; then
+    shift  # drop TYPE
+    if [[ -z "$OP" ]]; then
+        exec bash "$DIR/run_all.sh"
+    else
+        shift  # drop OP
+        exec bash "$DIR/run_all.sh" --ops "$OP" "$@"
+    fi
+fi
+
 # --- Interactive mode (no operation specified) ---
 if [[ -z "$OP" ]]; then
     cd "$DIR"
diff --git a/docs/plans/bigint_enhancement.md b/docs/plans/bigint_enhancement.md
new file mode 100644
index 00000000..3fbbd852
--- /dev/null
+++ b/docs/plans/bigint_enhancement.md
@@ -0,0 +1,300 @@
+# BigInt Enhancement Plan
+
+> **Date**: 2026-06-19 (created)
+> **Target**: decimo >=0.9.0
+> **Mojo Version**: >=v1.0.0
+> 子曰：工欲善其事，必先利其器。
+
+This document is the single source of truth for the arbitrary-precision
+**signed integer** (`decimo.BigInt`, base-2^32) performance & correctness
+effort. It supersedes `bigint2_benchmark_analysis.md` (2026-02-20), keeping
+only the still-relevant historical work items; the full predecessor is
+recoverable from git history.
+
+## 1. Cross-Language Snapshot
+
+Scope: **arbitrary-precision** signed integers. `BigInt10` (the legacy
+base-10^9 integer) and `BigUInt` are out of scope here.
+
+| Library           | Limb | Mul algorithm tier         | Div algorithm  | Sqrt                    |
+| ----------------- | ---- | -------------------------- | -------------- | ----------------------- |
+| decimo BigInt     | 2^32 | School → Kara              | Knuth-D → B-Z  | Newton → prec-doubling  |
+| Py `int` (C)      | 2^30 | School → Kara              | Knuth (school) | prec-doubling (`isqrt`) |
+| Rust `num-bigint` | 2^64 | School → Kara → Toom-3     | Knuth (school) | Newton                  |
+| Java `BigInteger` | 2^32 | School → Kara → Toom-3     | Knuth → B-Z    | Newton                  |
+| GMP `mpz_t`       | 2^64 | School → Kara → Toom → FFT | Newton-recip.  | Newton-reciprocal       |
+
+**Coverage.** `decimo.BigInt` already offers a complete integer API:
+`+ - * // % **`, `<< >>`, `& | ^ ~`, `sqrt` (integer), `from_string`,
+`to_string`, plus `gcd`/`extended_gcd`/`lcm`/`mod_pow`/`mod_inverse`
+(`number_theory.mojo`). No API gaps versus Python `int` or Rust
+`num-bigint`; the open work is purely performance (but the most difficult :D)
+
+## 2. Baseline (authoritative, 2026-06-19)
+
+Cross-language harness `benches/bigint/` — `decimo.BigInt` vs Python `int`
+(oracle + timing) vs Rust `num-bigint`. Release build (`-O3 -g0 -D
+ASSERT=none`), DCE-guarded (`keep`/`black_box`), best-of-N auto-tuned to
+~50ms/case. Median ns/iter; `dm/py` = decimo÷python, `dm/rs` =
+decimo÷rust (**< 1.00 = decimo faster**). 230 cases, 100% decimo-vs-Python
+agreement.
+
+| op           |   dm |   py |   rs | dm/py | dm/rs | dominant cost                                 |
+| ------------ | ---: | ---: | ---: | ----: | ----: | --------------------------------------------- |
+| add          |   48 |   31 |   41 |  1.5× |  1.2× | small-operand constant overhead               |
+| multiply     |   50 |   42 |   31 |  1.2× |  1.6× | no Toom-3, no SIMD partial products vs Rust   |
+| floor_divide |  230 |   59 |   41 |  3.9× |  5.6× | **per-call allocations (the worst op)**       |
+| power        |  340 |  160 |  297 |  2.1× |  1.2× | square-and-multiply per-op overhead           |
+| shift        |   42 |   39 |   26 |  1.1× |  1.6× | result-buffer allocation                      |
+| sqrt         |  580 |  373 |  564 |  1.5× |  1.0× | medium-size division overhead                 |
+| from_string  |  990 |  786 |  407 |  1.3× |  2.4× | base-10 → base-2^32 conversion (O(n²) medium) |
+| to_string    |  893 |  266 |  350 |  3.4× |  2.6× | O(n²) repeated `/10^9` at 50–1000 digits      |
+
+> **Methodology note.** These numbers replace the 2026-02-20 per-op
+> figures, which reported decimo *faster* than Python. That harness threw
+> the result away (`_ = a + b`) with no DCE guard, timed a single pass, and
+> ran on different hardware, so its ratios are not comparable. Treat the
+> 2026-06-19 figures as the baseline. They fluctuate ±10–20% run to run.
+
+## 3. Change History — Done
+
+Condensed from `bigint2_benchmark_analysis.md` (v0.8.0 effort). All items
+verified and merged; kept here as the algorithmic record.
+
+| Tag  | Item                                                                                             |
+| ---- | ------------------------------------------------------------------------------------------------ |
+| PR0  | sqrt correctness: overestimate-seeded Newton + CPython precision-doubling (was wrong ≥1000 d)    |
+| PR1  | Karatsuba multiply (`CUTOFF_KARATSUBA = 48` words); slice-based, offset assembly, ptr loops      |
+| PR2  | Slice-based Burnikel-Ziegler divide (`CUTOFF_BURNIKEL_ZIEGLER = 64`); ≤4-word divisor fast paths |
+| PR3  | Divide-and-conquer `to_string` base conversion (entry ≥128 words, leverages B-Z)                 |
+| PR4a | SIMD `parse_numeric_string` (two-pass, `vectorize[16]` digit extraction)                         |
+| PR4b | D&C `from_string` base conversion (entry > 10000 digits)                                         |
+| PR4c | `from_string` micro-opts (≤9/≤19-digit fast paths, pre-alloc, raw ptrs, balanced split)          |
+| PR4d | `to_string` micro-opts (1-/2-word fast paths, `InlineArray` byte buffer, raw ptrs)               |
+| PR5  | True in-place arithmetic for all 11 `__i*__` dunders (`add_inplace`, …)                          |
+| PR6  | Bitwise AND / OR / XOR / NOT with two's-complement semantics                                     |
+| PR7  | `gcd`, `extended_gcd`, `lcm`, `mod_pow`, `mod_inverse` (`number_theory.mojo`)                    |
+| PR8  | `BInt`/`BigInt` alias bound to the base-2^32 type (legacy → `BigInt10`)                          |
+
+## 4. Lessons Learnt
+
+Items 1–3 are BigInt-specific. Items 4–9 transfer from
+`bigdecimal_enhancement.md §4` / `decimal128_enhancement.md` — they hold
+unchanged for the variable-length signed case.
+
+1. **Newton sqrt must converge from above.** An underestimate seed lets
+   Newton settle on the wrong quadratic residue at ≥1000 digits (PR0). Seed
+   with a ceiling-rounded hardware sqrt of the top words; for huge inputs use
+   CPython precision-doubling — total work O(M(n)), not O(M(n)·log n).
+
+2. **Base-2^32 carries are shift/mask, not division — so the BigDecimal
+   "deferred-carry / Comba" multiply win does NOT transfer.** The base-10^9
+   Comba trick (T-9) existed to amortise the `% 10^9` / `/ 10^9` on every
+   inner-product step. In base-2^32 the carry is already `>> 32` + `&
+   0xFFFFFFFF` (no divide), so the multiply gap versus Rust is **Toom-3 +
+   SIMD partial-product accumulation**, not carry amortisation. Measure
+   before porting any base-10^9 micro-opt.
+
+3. **Slice-based recursion is mandatory for B-Z.** The first copy-based
+   Burnikel-Ziegler regressed (excess `List[UInt32]` allocation per level);
+   passing `(list, start, end)` bounds and materialising only at the Knuth-D
+   base case is what made it a net win (PR2). Any new D&C kernel must follow
+   the same no-copy-until-base-case discipline.
+
+   3a. **A cross-language gap is usually decimo's own overhead, not the
+   limb width.** When I first saw the 5.6× Rust floor_divide gap I blamed
+   the representation (decimo is base-2^32, num-bigint is base-2^64). The
+   benchmark says otherwise. Python uses base-2^30 limbs, even narrower
+   than decimo's, and still divides 3.9× faster; sqrt is multiply- and
+   divide-heavy yet already at parity with Rust. So a wider limb is not why
+   decimo trails. Look for the real cost first: redundant copies, per-call
+   allocations, branches in the inner loop. Only reach for the limb width
+   once those are gone (T-W1).
+
+4. **`debug_assert` does NOT lazy-evaluate its message** under `-D
+   ASSERT=none`; a `String.format(...)` argument still allocates in the hot
+   loop. Use plain string literals (or the variadic `debug_assert(cond,
+   "msg ", value)` form) only.
+
+5. **Hot path first.** The count of branches *before* the fast arm matters
+   more than the fast arm's body. Route rare cases (zero operand, sign
+   mismatch, differing length) to a cold tail of the same function.
+
+6. **`@no_inline` the body of every `raise … .format(...)` helper** so
+   `@always_inline` can fire on the parent and icache pressure at inline
+   raise edges drops.
+
+7. **Hoist a raw data pointer in multi-buffer / O(n²) inner loops.** A
+   `List[i]` access reloads `List._data` every element. Hoisting
+   (`var p = lst.unsafe_ptr()`) is a stable win when an iteration touches
+   ≥2 buffers or is an O(n²) inner loop; it does **not** clear the ~3% bar
+   for single-buffer single-pass O(n) loops that are arithmetic-bound.
+   Safety: the buffer must not be resized while the pointer is live.
+
+8. **Precision doubling is the lever for Newton-style methods** (sqrt, and
+   any future reciprocal divide): start small and double, total work ≈ 3×
+   the final iteration instead of `log n` full-width iterations.
+
+9. **Reciprocal-Newton divide only wins once multiplication is much
+   cheaper than division** (the NTT regime). With Karatsuba, a B-Z divide is
+   ~2–3× a same-size multiply, so a reciprocal-Newton rewrite would be a
+   *regression* today — gate it behind Toom-3/NTT (T-M1).
+
+## 5. Open Items
+
+Worked in priority order. There is one real outlier, floor_divide; the
+rest are smaller. The limb-width question sits at the end, because the
+benchmark shows it is not why decimo trails today.
+
+### floor_divide / truncate_divide — the outlier (3.9× py, 5.6× rs)
+
+floor_divide is the only op that trails badly. add and multiply are within
+1.2–1.5×, but divide is 3.9× Python and 5.6× Rust. Two facts rule out the
+easy explanations:
+
+- It is not the algorithm. decimo and num-bigint both run Knuth Algorithm D
+  below the Burnikel-Ziegler cutoff (64 words).
+- It is not the limb width. Python's `int` uses base-2^30 limbs, narrower
+  than decimo's base-2^32, and still divides 3.9× faster. A wider limb
+  would help Python, not decimo.
+
+The cause is decimo's per-call overhead on small and medium operands. A
+small divide such as `-7 // 2` does three or four heap allocations in
+decimo and almost none in Python or Rust:
+
+- `floor_divide` copies the quotient and remainder out of the divmod tuple
+  with `.copy()`, two allocations it does not need, then allocates a third
+  time through `_add_magnitudes(q, 1)` on the negative-floor branch.
+- `_divmod_magnitudes` normalises both operands with `_shift_left_words` on
+  every multi-word call, two more allocations, even when the operands are
+  tiny.
+- The Knuth-D inner loop recomputes `len(u)` and re-checks `idx < len(u)`
+  on every step and takes a branchy manual borrow. num-bigint walks a slice
+  with a branchless offset-carry.
+
+**T-D1 — remove the redundant allocations.** Move the quotient and
+remainder out of the divmod tuple instead of copying them. Increment in
+place on the negative-floor branch. Fold the Knuth-D normalisation shift
+into the base case so it stops allocating two fresh buffers per call.
+
+**T-D2 — tighten the inner loop.** Hoist `len(u)` and the `u`/`v` data
+pointers out of the multiply-subtract loop (Lesson 7, two buffers) and
+replace the manual borrow with num-bigint's branchless offset-carry.
+
+**T-D3 — re-tune `CUTOFF_BURNIKEL_ZIEGLER`.** Re-measure 32/48/64 once the
+base case is cheaper. The 2n-by-n / 4n-by-n / 8n-by-n slowdown already
+noted for `BigUInt` in `todo.md` may share this root and should be checked
+together.
+
+**T-D4 — reciprocal / Barrett divide. Deferred.** Not worth it before
+Toom-3 (Lesson 9).
+
+### to_string, 50–1000 digits (3.4× py)
+
+The 1- and 2-word fast paths and the D&C path above 128 words are done. The
+50–1000-digit band still runs the O(n²) simple path of repeated division by
+10^9, with the `InlineArray` chunked emit already in place.
+
+**T-T1 — lower the D&C entry threshold** once divide is cheaper (D&C is
+gated on divide cost). Re-measure entry = 64 / 96.
+
+**T-T2 — wider radix per chunk.** Batching the repeated `/10^9` into a
+larger radix only helps if it avoids the software-emulated 128-bit divide.
+PR4d rejected 10^18 chunks for exactly this reason; re-verify on current
+hardware before trying again.
+
+### multiply (1.2× py, 1.6× rs)
+
+decimo stops at Karatsuba; num-bigint adds Toom-3, which is what makes it
+1.6× faster on large operands.
+
+**T-M1 — Toom-3 multiplication.** Add a Toom-3 tier above ~256 words, the
+same cutoff ratio used in BigDecimal / BigUInt.
+
+**T-M2 — SIMD partial-product accumulation** in the schoolbook base case
+(NEON 4×UInt32), which is the base for both Karatsuba and a future Toom-3.
+This is the base-2^32 analogue of the BigDecimal multiply win; note that
+the base-10^9 Comba trick itself does not transfer (Lesson 2).
+
+### power (2.1× py) and add (1.5× py)
+
+**T-P1 — power inner loop.** General (non-2^N) power pays for a fresh
+temporary on every multiply. Route the loop through `multiply_inplace` and
+add a dedicated `square()` that exploits symmetry, roughly half the partial
+products. The 2^N shift fast path is already excellent; Rust loses to it.
+
+**T-A1 — add/sub dispatch.** SIMD add is already in place, so the
+small-operand gap is dispatch, not the kernel. Put the same-length
+same-sign case first (Lesson 5) and check for any stray
+`debug_assert .format` (Lesson 4).
+
+### from_string and shift
+
+**T-F1 — from_string base conversion.** The 50–10000-digit band runs an
+O(n²) base-10 → base-2^32 conversion. Lower the D&C entry threshold once
+multiply is faster; the 20000-digit-and-up gap only closes with Toom-3
+(T-M1).
+
+**T-SH1 — shift allocation.** Extreme shifts such as `1 << 100000` are
+allocation-bound. Pre-size the result buffer with
+`resize(unsafe_uninit_length=…)` (O(1) capacity plus memset) instead of
+letting it grow.
+
+### A bigger bet: base-2^64 limbs (unproven, not the first lever)
+
+num-bigint stores base-2^64 limbs on 64-bit targets; decimo stores
+base-2^32. For the same number num-bigint holds half the limbs, so its
+schoolbook multiply and Knuth-D base case run over half the words. This is
+worth keeping in mind for the large-operand multiply and from_string cases.
+
+It is not why decimo trails today, and I want to be clear about that. sqrt
+is multiply- and divide-heavy yet already sits at parity with Rust (1.0×),
+and Python beats decimo at divide with even narrower limbs. So I treat a
+wider limb as a later, large bet, and only after the per-call overhead
+above is gone.
+
+Feasibility (probed 2026-06-19, Mojo v1.0.0b1). The Rust `cfg_digit!` idea
+ports. Mojo rejects a ternary on the types themselves
+(`UInt64 if is_64bit() else UInt32`), but a ternary on `DType` values is
+accepted, so one comptime block selects the limb per target:
+
+```mojo
+comptime BASE_DT: DType = DType.uint64 if is_64bit() else DType.uint32
+comptime DOUBLE_DT: DType = DType.uint128 if is_64bit() else DType.uint64
+comptime BigBase = Scalar[BASE_DT]          # UInt64 on 64-bit
+comptime DoubleBigBase = Scalar[DOUBLE_DT]  # UInt128 on 64-bit
+comptime BITS: Int = 64 if is_64bit() else 32
+```
+
+`UInt128` `*`, `//`, `%`, `>>`, `&` all compute correctly; the `u128 ÷ u64`
+divide is software-emulated on arm64 but gives the right answer, same as
+num-bigint. The aliasing is trivial. The migration is not: base-2^32 is
+hard-coded across `src/decimo/bigint/` — the `List[UInt32]` field and every
+signature, the `1 << 32` / `0xFFFF_FFFF` / `>> 32` literals, the 4×UInt32
+NEON width, `_count_leading_zeros`, the base-10 ↔ base-2^k chunking in
+`from_string` / `to_string` (9 vs 19 digits per limb, the hard part), and
+`BigInt10` bit-layout interop. If I do it, I will first introduce
+`BigBase` / `DoubleBigBase` / `BITS` / `BASE` / `MASK` and replace every
+literal while keeping the limb at uint32, a pure and testable refactor with
+no behaviour change, then flip to uint64 and fix the base-conversion and
+SIMD fallout behind the test suite.
+
+**T-W1 — base-2^64 limbs. Open, low priority, unproven.**
+
+### Plan
+
+| Label | Item                                             | Status                                |
+| ----- | ------------------------------------------------ | ------------------------------------- |
+| T-D1  | Remove redundant `.copy()` /                     | OPEN — the floor_divide outlier (P0)  |
+|       | normalise allocs in divide                       |                                       |
+| T-D2  | Hoist Knuth-D inner loop;                        | OPEN — Lesson 7 (two buffers)         |
+|       | branchless offset-carry                          |                                       |
+| T-D3  | Re-tune `CUTOFF_BURNIKEL_ZIEGLER`                | OPEN — pair with the BigUInt todo     |
+| T-T1  | Lower to_string D&C entry threshold              | OPEN — after T-D1 / T-D2              |
+| T-M1  | Toom-3 multiply above ~256 words                 | OPEN — unblocks multiply, from_string |
+| T-M2  | SIMD partial-product accumulation in school base | OPEN                                  |
+| T-P1  | `square()` plus inplace loop for power           | OPEN                                  |
+| T-A1  | add/sub dispatch reorder                         | OPEN                                  |
+| T-SH1 | Pre-size the shift result buffer                 | OPEN                                  |
+| T-W1  | Base-2^64 limbs                                  | OPEN — unproven, low priority         |
+| T-D4  | Reciprocal-Newton divide                         | DEFERRED — needs Toom-3 (Lesson 9)    |
diff --git a/pixi.toml b/pixi.toml
index f59fe203..8de97ec2 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -64,7 +64,7 @@ tf = "clear && pixi run testfloat"
 
 # quick build & run for a single BigFloat-using .mojo file
 # Usage: pixi run bf path/to/file.mojo
-bf = { cmd = "bash examples/run_bigfloat.sh", description = "Build & run a single BigFloat .mojo file (output binary in temp/)" }
+bf = {cmd = "bash examples/run_bigfloat.sh", description = "Build & run a single BigFloat .mojo file (output binary in temp/)"}
 
 # bench
 bench = "pixi run package && bash benches/run_bench.sh"
@@ -73,8 +73,8 @@ bench = "pixi run package && bash benches/run_bench.sh"
 bdec_debug = """clear && pixi run package && cd benches/bigdecimal \
 &&pixi run mojo run -I ../ -D ASSERT=all bench.mojo && cd ../.. \
 &&pixi run clean"""
-bint_debug = """clear && pixi run package && cd benches/bigint \
-&&pixi run mojo run -I ../ -D ASSERT=all bench.mojo && cd ../.. \
+bint_debug = """clear && pixi run package && cd benches/bigint/mojo \
+&&pixi run mojo run -I ../../../src -D ASSERT=all bench.mojo --op add && cd ../../.. \
 &&pixi run clean"""
 buint_debug = """clear && pixi run package && cd benches/biguint \
 &&pixi run mojo run -I ../ -D ASSERT=all bench.mojo && cd ../.. \