diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index de40bfb8c..c22bae6a3 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -46,7 +46,7 @@ jobs: bench-base: name: Bench base needs: merge-base - runs-on: [matterlabs-ci-runner-highmem] + runs-on: [matterlabs-ci-runner-high-performance] permissions: contents: read steps: @@ -58,27 +58,81 @@ jobs: - uses: ./.github/actions/setup-toolchain - name: Compile for RISC-V working-directory: ./zksync_os + # `for-tests-benchmarking-pectra` was introduced on the PR side of + # this workflow; the merge-base may predate it. Fall back to the + # non-pectra type so bench-base can still produce baseline numbers + # for `test_precompiles` even if the merge-base lacks pectra + # support (test_pectra_precompiles / test_kzg_regression are then + # exercised only on the head side — best-effort coverage). run: | - ./dump_bin.sh --type for-tests-benchmarking + if grep -q "for-tests-benchmarking-pectra" dump_bin.sh; then + ./dump_bin.sh --type for-tests-benchmarking-pectra + else + ./dump_bin.sh --type for-tests-benchmarking + fi ./dump_bin.sh --type evm-replay-benchmarking - name: Run benchmarks shell: bash + # Profile/feature/test-filter selection adapts to the checked-out tree: + # - `bench-fast` profile: introduced on the PR; merge-base may lack + # it, in which case we fall back to `--release`. + # - `precompiles/pectra` feature + extra test functions: + # introduced together with `for-tests-benchmarking-pectra`; if the + # proving binary type is unavailable, the proving binary can't run + # BLS/BLAKE2F/KZG vectors, so we drop those tests + the feature. run: | + if grep -q "bench-fast" Cargo.toml; then + PROFILE="--profile bench-fast" + else + PROFILE="--release" + fi + if grep -q "for-tests-benchmarking-pectra" zksync_os/dump_bin.sh; then + PRECOMPILES_FEATURES="rig/no_print,precompiles/cycle_marker,precompiles/pectra,rig/unlimited_native" + PRECOMPILES_TESTS="test_precompiles test_pectra_precompiles test_kzg_regression" + else + PRECOMPILES_FEATURES="rig/no_print,precompiles/cycle_marker,rig/unlimited_native" + PRECOMPILES_TESTS="test_precompiles" + fi for dir in tests/instances/eth_runner/blocks/*; do blk=$(basename "$dir") + # Pass 1: default DA scheme (BlobsAndPubdataKeccak256) — full + # instrumentation (opcodes + precompiles + cycle markers). OPCODE_SAMPLES_DIR=$(pwd)/opcode_samples/base_${blk} \ OPCODE_CYCLE_SAMPLES_DIR=$(pwd)/opcode_cycles/base_${blk} \ MARKER_PATH=$(pwd)/base_block_${blk}.bench \ - cargo run --manifest-path tests/instances/eth_runner/Cargo.toml --release --features rig/no_print,rig/cycle_marker,rig/unlimited_native -- single-run --block-dir "$dir" --opcode-stats > base_block_${blk}.out + PRECOMPILE_STATS_PATH=$(pwd)/base_block_${blk}_precompile_stats.csv \ + PRECOMPILE_SAMPLES_DIR=$(pwd)/precompile_samples/base_${blk} \ + LABEL_CYCLE_SAMPLES_DIR=$(pwd)/precompile_cycles/base_${blk} \ + cargo run --manifest-path tests/instances/eth_runner/Cargo.toml $PROFILE --features rig/no_print,rig/cycle_marker,rig/unlimited_native -- single-run --block-dir "$dir" --opcode-stats > base_block_${blk}.out + # Pass 2: BlobsZKsyncOS DA scheme. Only the post-tx-op stage + # differs (the tx-loop work is identical to pass 1), so we capture + # ONLY the cycle markers here — no opcode/precompile dumps. + # Skip on merge-bases that lack BENCH_DA_SCHEME plumbing. + if grep -q "BENCH_DA_SCHEME" tests/instances/eth_runner/src/single_run.rs; then + BENCH_DA_SCHEME=blobs_zksync_os \ + MARKER_PATH=$(pwd)/base_block_${blk}_blobs.bench \ + cargo run --manifest-path tests/instances/eth_runner/Cargo.toml $PROFILE --features rig/no_print,rig/cycle_marker,rig/unlimited_native -- single-run --block-dir "$dir" > base_block_${blk}_blobs.out + fi done - MARKER_PATH=$(pwd)/base_precompiles.bench cargo test --release --features rig/no_print,precompiles/cycle_marker,rig/unlimited_native -p precompiles -- test_precompiles + # Test name substring filters (Rust's harness matches if ANY substring matches): + # test_precompiles — 114 core precompile vectors (TESTS) + # test_pectra_precompiles — 6 BLAKE2F + BLS12-381 vectors (PECTRA_TESTS, gated by `precompiles/pectra` feature) + # test_kzg_regression — 1 KZG / point_evaluation vector (KZG_TESTS) + # `test_p256` (781 P256 vectors) is `#[ignore = "Too long for CI"]`; would + # need `--include-ignored` to run, which significantly lengthens CI. Tracked + # as a follow-up coverage gap. + MARKER_PATH=$(pwd)/base_precompiles.bench PRECOMPILE_STATS_PATH=$(pwd)/base_precompile_stats.csv PRECOMPILE_SAMPLES_DIR=$(pwd)/base_precompile_samples LABEL_CYCLE_SAMPLES_DIR=$(pwd)/base_precompile_cycles cargo test $PROFILE --features $PRECOMPILES_FEATURES -p precompiles -- --test-threads=1 $PRECOMPILES_TESTS - uses: actions/upload-artifact@v4 with: name: bench-base-results path: | base_block_*.bench + base_block_*_blobs.bench base_block_*.out + base_block_*_blobs.out + base_block_*_precompile_stats.csv base_precompiles.bench + base_precompile_stats.csv opcode_samples/ opcode_cycles/ @@ -86,7 +140,7 @@ jobs: bench-head: name: Bench head needs: merge-base - runs-on: [matterlabs-ci-runner-highmem] + runs-on: [matterlabs-ci-runner-high-performance] permissions: contents: read steps: @@ -99,31 +153,75 @@ jobs: - uses: ./.github/actions/setup-toolchain - name: Compile for RISC-V working-directory: ./zksync_os + # `for-tests-benchmarking-pectra` was introduced on the PR side of + # this workflow; the merge-base may predate it. Fall back to the + # non-pectra type so bench-base can still produce baseline numbers + # for `test_precompiles` even if the merge-base lacks pectra + # support (test_pectra_precompiles / test_kzg_regression are then + # exercised only on the head side — best-effort coverage). run: | - ./dump_bin.sh --type for-tests-benchmarking + if grep -q "for-tests-benchmarking-pectra" dump_bin.sh; then + ./dump_bin.sh --type for-tests-benchmarking-pectra + else + ./dump_bin.sh --type for-tests-benchmarking + fi ./dump_bin.sh --type evm-replay-benchmarking - name: Run benchmarks shell: bash + # See bench-base for the rationale on profile/feature/test-filter + # detection. Mirrored here so the same fallback applies if/when the + # head also lacks one of these (defensive — usually head has them). run: | + if grep -q "bench-fast" Cargo.toml; then + PROFILE="--profile bench-fast" + else + PROFILE="--release" + fi + if grep -q "for-tests-benchmarking-pectra" zksync_os/dump_bin.sh; then + PRECOMPILES_FEATURES="rig/no_print,precompiles/cycle_marker,precompiles/pectra,rig/unlimited_native" + PRECOMPILES_TESTS="test_precompiles test_pectra_precompiles test_kzg_regression" + else + PRECOMPILES_FEATURES="rig/no_print,precompiles/cycle_marker,rig/unlimited_native" + PRECOMPILES_TESTS="test_precompiles" + fi for dir in tests/instances/eth_runner/blocks/*; do blk=$(basename "$dir") + # Pass 1: default DA scheme (BlobsAndPubdataKeccak256). OPCODE_SAMPLES_DIR=$(pwd)/opcode_samples/head_${blk} \ OPCODE_CYCLE_SAMPLES_DIR=$(pwd)/opcode_cycles/head_${blk} \ OPCODE_STATS_PATH=$(pwd)/head_block_${blk}_opcode_stats.csv \ MARKER_PATH=$(pwd)/head_block_${blk}.bench \ - cargo run --manifest-path tests/instances/eth_runner/Cargo.toml --release --features rig/no_print,rig/cycle_marker,rig/unlimited_native -- single-run --block-dir "$dir" --opcode-stats > head_block_${blk}.out + PRECOMPILE_STATS_PATH=$(pwd)/head_block_${blk}_precompile_stats.csv \ + PRECOMPILE_SAMPLES_DIR=$(pwd)/precompile_samples/head_${blk} \ + LABEL_CYCLE_SAMPLES_DIR=$(pwd)/precompile_cycles/head_${blk} \ + cargo run --manifest-path tests/instances/eth_runner/Cargo.toml $PROFILE --features rig/no_print,rig/cycle_marker,rig/unlimited_native -- single-run --block-dir "$dir" --opcode-stats > head_block_${blk}.out + # Pass 2: BlobsZKsyncOS — only cycle markers captured. + if grep -q "BENCH_DA_SCHEME" tests/instances/eth_runner/src/single_run.rs; then + BENCH_DA_SCHEME=blobs_zksync_os \ + MARKER_PATH=$(pwd)/head_block_${blk}_blobs.bench \ + cargo run --manifest-path tests/instances/eth_runner/Cargo.toml $PROFILE --features rig/no_print,rig/cycle_marker,rig/unlimited_native -- single-run --block-dir "$dir" > head_block_${blk}_blobs.out + fi done - MARKER_PATH=$(pwd)/head_precompiles.bench cargo test --release --features rig/no_print,precompiles/cycle_marker,rig/unlimited_native -p precompiles -- test_precompiles + # See bench-base step for filter-substring rationale. + MARKER_PATH=$(pwd)/head_precompiles.bench PRECOMPILE_STATS_PATH=$(pwd)/head_precompile_stats.csv PRECOMPILE_SAMPLES_DIR=$(pwd)/head_precompile_samples LABEL_CYCLE_SAMPLES_DIR=$(pwd)/head_precompile_cycles cargo test $PROFILE --features $PRECOMPILES_FEATURES -p precompiles -- --test-threads=1 $PRECOMPILES_TESTS - uses: actions/upload-artifact@v4 with: name: bench-head-results path: | head_block_*.bench + head_block_*_blobs.bench head_block_*.out + head_block_*_blobs.out head_block_*_opcode_stats.csv + head_block_*_precompile_stats.csv head_precompiles.bench + head_precompile_stats.csv + head_precompile_samples/ + head_precompile_cycles/ opcode_samples/ opcode_cycles/ + precompile_samples/ + precompile_cycles/ # Compare base and head results, post comment. compare: @@ -158,33 +256,103 @@ jobs: # Move all files to the workspace root so scripts find them by name. cp base-results/base_block_*.bench base-results/base_block_*.out base-results/base_precompiles.bench . 2>/dev/null || true cp head-results/head_block_*.bench head-results/head_block_*.out head-results/head_precompiles.bench . 2>/dev/null || true + cp base-results/base_precompile_stats.csv . 2>/dev/null || true + cp head-results/head_precompile_stats.csv . 2>/dev/null || true + cp base-results/base_block_*_precompile_stats.csv . 2>/dev/null || true + cp head-results/head_block_*_precompile_stats.csv . 2>/dev/null || true cp head-results/head_block_*_opcode_stats.csv . 2>/dev/null || true + # Precompile per-execution samples + cycles are head-only (base + # currently has no per-block tracer wiring); the comparison join + # consumes only the head-side data. + cp -r head-results/head_precompile_samples . 2>/dev/null || true + cp -r head-results/head_precompile_cycles . 2>/dev/null || true # Merge opcode_samples and opcode_cycles directories mkdir -p opcode_samples opcode_cycles cp -r base-results/opcode_samples/* opcode_samples/ 2>/dev/null || true cp -r head-results/opcode_samples/* opcode_samples/ 2>/dev/null || true cp -r base-results/opcode_cycles/* opcode_cycles/ 2>/dev/null || true cp -r head-results/opcode_cycles/* opcode_cycles/ 2>/dev/null || true + # Per-block precompile samples and cycles (head only). + mkdir -p precompile_samples precompile_cycles + cp -r head-results/precompile_samples/* precompile_samples/ 2>/dev/null || true + cp -r head-results/precompile_cycles/* precompile_cycles/ 2>/dev/null || true - name: Generate comparison shell: bash id: comparison run: | mkdir -p bench_results - pairs="" + # Build three separate pair lists so the resulting PR comment has a + # clear top-level structure: + # - Headline: `process_block` per (block, DA scheme) — the rows + # reviewers should always see. + # - Sub-phases: `system_init`, `run_tx_loop`, `da_commitment`, + # `state_commitment_update`, `blob_versioned_hash`. + # Useful when something + # regresses inside one of these stages; otherwise + # visual debt → collapsed under
. + # - Precompiles bench: the synthetic test-crate workload, which + # expands to ~30 labels — collapsed under
. + headline_pairs="" + subphase_pairs="" + # The default DA scheme run (BlobsAndPubdataKeccak256) gets all four + # sub-phases. The BlobsZKsyncOS pass only differs in the post-tx-op + # stage (the tx loop is identical), so we surface only the rows that + # actually change: `da_commitment`, `state_commitment_update`, and `blob_versioned_hash`. + subphase_symbols_keccak="system_init run_tx_loop da_commitment state_commitment_update" + subphase_symbols_blobs="da_commitment state_commitment_update blob_versioned_hash" + add_pair() { + local list_var="$1"; local entry="$2" + if [ -z "${!list_var}" ]; then + eval "$list_var=\$entry" + else + eval "$list_var=\"\${$list_var},\$entry\"" + fi + } for dir in tests/instances/eth_runner/blocks/*; do blk=$(basename "$dir") python3 bench_scripts/parse_opcodes.py base_block_${blk}.out bench_results/base_block_${blk}.csv bench_results/base_block_${blk}.png python3 bench_scripts/parse_opcodes.py head_block_${blk}.out bench_results/head_block_${blk}.csv bench_results/head_block_${blk}.png - if [ -z "$pairs" ]; then - pairs="(\"block_${blk}\", \"base_block_${blk}.bench\", \"head_block_${blk}.bench\", \"process_block\")" - else - pairs="${pairs},(\"block_${blk}\", \"base_block_${blk}.bench\", \"head_block_${blk}.bench\", \"process_block\")" + add_pair headline_pairs "(\"block_${blk} (keccak DA)\", \"base_block_${blk}.bench\", \"head_block_${blk}.bench\", \"process_block\")" + for sym in $subphase_symbols_keccak; do + add_pair subphase_pairs "(\"block_${blk} (keccak DA)\", \"base_block_${blk}.bench\", \"head_block_${blk}.bench\", \"${sym}\")" + done + # When the merge-base predates `BENCH_DA_SCHEME` plumbing the + # bench-base job emits no blobs `.bench` file. Fall back to + # comparing the head's blobs file against itself so the absolute + # values are still visible in the PR comment (deltas will read 0% + # — fine until the next PR cycles after merge). + if [ -f "head_block_${blk}_blobs.bench" ]; then + if [ -f "base_block_${blk}_blobs.bench" ]; then + base_blob="base_block_${blk}_blobs.bench" + else + base_blob="head_block_${blk}_blobs.bench" + fi + add_pair headline_pairs "(\"block_${blk} (blobs DA)\", \"${base_blob}\", \"head_block_${blk}_blobs.bench\", \"process_block\")" + for sym in $subphase_symbols_blobs; do + add_pair subphase_pairs "(\"block_${blk} (blobs DA)\", \"${base_blob}\", \"head_block_${blk}_blobs.bench\", \"${sym}\")" + done fi done - pairs="${pairs},(\"precompiles\", \"base_precompiles.bench\", \"head_precompiles.bench\")" - # Save comparison to file (for artifact + comment workflow fallback) - python3 bench_scripts/compare_bench.py "[${pairs}]" > bench_results/comparison.md + precompiles_pair="(\"precompiles\", \"base_precompiles.bench\", \"head_precompiles.bench\")" + # Headline section: process_block per (block, DA scheme). + echo "## Block-level effective cycles" > bench_results/comparison.md + echo "" >> bench_results/comparison.md + python3 bench_scripts/compare_bench.py --no-title "[${headline_pairs}]" >> bench_results/comparison.md + # Block sub-phases collapsed under
. + echo "" >> bench_results/comparison.md + echo "
Block-level sub-phases" >> bench_results/comparison.md + echo "" >> bench_results/comparison.md + python3 bench_scripts/compare_bench.py --no-title --sort-by-symbol "[${subphase_pairs}]" >> bench_results/comparison.md + echo "" >> bench_results/comparison.md + echo "
" >> bench_results/comparison.md + # Synthetic precompiles test-crate bench collapsed under
. + echo "" >> bench_results/comparison.md + echo "
Precompiles test-crate bench (synthetic workload, all labels)" >> bench_results/comparison.md + echo "" >> bench_results/comparison.md + python3 bench_scripts/compare_bench.py --no-title "[${precompiles_pair}]" >> bench_results/comparison.md + echo "" >> bench_results/comparison.md + echo "
" >> bench_results/comparison.md # Collect file lists for aggregated per-opcode comparison stats_args="" cycle_args="" @@ -199,15 +367,74 @@ jobs: stats_sample_args="$stats_sample_args $(pwd)/opcode_samples/base_${blk} $(pwd)/opcode_samples/head_${blk}" cycle_sample_args="$cycle_sample_args $(pwd)/opcode_samples/base_${blk} $(pwd)/opcode_cycles/base_${blk} $(pwd)/opcode_samples/head_${blk} $(pwd)/opcode_cycles/head_${blk}" done + # Per-opcode changes — both scripts emit nothing if nothing moved. + echo "" >> bench_results/comparison.md + echo "## Per-opcode" >> bench_results/comparison.md # Aggregated per-opcode EVM stats (gas/native/ratios) - python3 bench_scripts/compare_opcode_stats.py $stats_args \ + if ! python3 bench_scripts/compare_opcode_stats.py $stats_args \ --sample-dirs $stats_sample_args \ - >> bench_results/comparison.md 2>/dev/null || true + >> bench_results/comparison.md; then + echo "" >> bench_results/comparison.md + echo "_Per-opcode gas/native diff generation failed; see CI logs._" >> bench_results/comparison.md + fi # Aggregated per-opcode RISC-V cycles (and cycles/gas ratios) - python3 bench_scripts/compare_opcode_cycles.py $cycle_args \ + if ! python3 bench_scripts/compare_opcode_cycles.py $cycle_args \ --gas-stats $gas_args \ --sample-dirs $cycle_sample_args \ - >> bench_results/comparison.md 2>/dev/null || true + >> bench_results/comparison.md; then + echo "" >> bench_results/comparison.md + echo "_Per-opcode cycles diff generation failed; see CI logs._" >> bench_results/comparison.md + fi + # Per-execution precompile join inputs: aggregate the per-block + # tracer sample dirs and cycle sample dirs into positional pairs + # for `join_precompile_samples.py`. `--opcode-samples-dir` + # provides gas/native for synthetic precompile entries (currently + # `keccak` sourced from `SHA3.samples`). + join_pairs="head_precompile_samples head_precompile_cycles" + join_bench_args="" + join_opcode_args="" + if [ -f head_precompiles.bench ]; then + join_bench_args="--bench-file head_precompiles.bench" + fi + # Test-crate run dumps opcode samples to a flat dir; per-block runs + # dump under opcode_samples/head_${blk}. Pass the flat dir as the + # first --opcode-samples-dir to align with the first join pair. + if [ -d head_precompile_samples ]; then + join_opcode_args="--opcode-samples-dir opcode_samples" + fi + for dir in tests/instances/eth_runner/blocks/*; do + blk=$(basename "$dir") + if [ -d "precompile_samples/head_${blk}" ] && [ -d "precompile_cycles/head_${blk}" ]; then + join_pairs="$join_pairs precompile_samples/head_${blk} precompile_cycles/head_${blk}" + # Each --bench-file / --opcode-samples-dir is matched positionally + # to its (tracer_dir, cycles_dir) pair. + if [ -f "head_block_${blk}.bench" ]; then + join_bench_args="$join_bench_args --bench-file head_block_${blk}.bench" + else + join_bench_args="$join_bench_args --bench-file /dev/null" + fi + if [ -d "opcode_samples/head_${blk}" ]; then + join_opcode_args="$join_opcode_args --opcode-samples-dir opcode_samples/head_${blk}" + else + join_opcode_args="$join_opcode_args --opcode-samples-dir /dev/null" + fi + fi + done + # Per-execution precompile cycles/gas + native/gas (joined), + # aggregated across the test crate + all block benchmarks. + echo "" >> bench_results/comparison.md + echo "## Per-precompile" >> bench_results/comparison.md + echo "" >> bench_results/comparison.md + echo "
Per-precompile per-execution ratios (head)" >> bench_results/comparison.md + echo "" >> bench_results/comparison.md + echo '```' >> bench_results/comparison.md + if ! python3 bench_scripts/join_precompile_samples.py $join_pairs $join_bench_args $join_opcode_args --summary \ + >> bench_results/comparison.md; then + echo "(per-execution ratios generation failed; see CI logs)" >> bench_results/comparison.md + fi + echo '```' >> bench_results/comparison.md + echo "" >> bench_results/comparison.md + echo "
" >> bench_results/comparison.md # Also write to step output for direct comment EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64) echo "result<<$EOF" >> $GITHUB_OUTPUT @@ -217,17 +444,20 @@ jobs: - name: Collect per-opcode artifacts shell: bash run: | + # These produce supplementary CSV artifacts (not PR-comment + # content). On failure we let the job continue (`|| true`) but + # send stderr to the log so failures are visible during debug. for dir in tests/instances/eth_runner/blocks/*; do blk=$(basename "$dir") # Combined gas+native+cycles stats CSV python3 bench_scripts/join_opcode_stats.py \ head_block_${blk}.out head_block_${blk}.bench \ - --csv bench_results/head_block_${blk}_joined_stats.csv 2>/dev/null || true + --csv bench_results/head_block_${blk}_joined_stats.csv || true # Per-execution joined CSVs if [ -d "opcode_samples/head_${blk}" ] && [ -d "opcode_cycles/head_${blk}" ]; then python3 bench_scripts/join_samples.py \ opcode_samples/head_${blk} opcode_cycles/head_${blk} \ - --out-dir bench_results/per_opcode/block_${blk} 2>/dev/null || true + --out-dir bench_results/per_opcode/block_${blk} || true fi done # Generate visualization charts diff --git a/Cargo.toml b/Cargo.toml index 6b163d8a9..a6b87f7f7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -115,6 +115,28 @@ lto = true codegen-units = 1 debug = true +# Fast-compile variant used by the bench CI for in-workspace bench +# targets — specifically `cargo test -p precompiles --profile bench-fast`. +# `tests/instances/eth_runner/` is excluded from this workspace (see +# `workspace.exclude` above), so its `cargo run --manifest-path …` uses +# the duplicate `[profile.bench-fast]` defined in +# `tests/instances/eth_runner/Cargo.toml`; keep the two in sync. +# Runtime perf of these driver binaries doesn't affect measurements +# (cycle counts come from the RISC-V simulator), so disabling fat LTO +# and parallelizing codegen cuts "Run benchmarks" compile time by a +# large factor. The RISC-V proving binary is yet another workspace at +# `zksync_os/Cargo.toml` and uses its own `[profile.release]` — also +# unaffected. +# NOTE: the literal string `bench-fast` is used as a `grep -q` fallback +# target by `.github/workflows/bench.yml` — if this profile name is +# changed, update the workflow too. +[profile.bench-fast] +inherits = "release" +opt-level = 3 +lto = false +codegen-units = 16 +debug = false + [patch.crates-io] #zksync_os_evm_errors = { path = "../zksync-os-interface/crates/evm-errors" } #zksync_os_interface = { path = "../zksync-os-interface/crates/interface" } diff --git a/basic_bootloader/src/bootloader/block_flow/ethereum/post_tx_op_proving.rs b/basic_bootloader/src/bootloader/block_flow/ethereum/post_tx_op_proving.rs index ea0397607..2985faf34 100644 --- a/basic_bootloader/src/bootloader/block_flow/ethereum/post_tx_op_proving.rs +++ b/basic_bootloader/src/bootloader/block_flow/ethereum/post_tx_op_proving.rs @@ -119,9 +119,9 @@ where &initial_state_commitment ); - // // 3. Verify/apply reads and writes + // 3. Verify/apply reads and writes — state-tree merkle commit. let mut updated_state_commitment = initial_state_commitment; - cycle_marker::wrap!("verify_and_apply_batch", { + cycle_marker::wrap!("state_commitment_update", { io.update_commitment( Some(&mut updated_state_commitment), &mut logger, diff --git a/basic_bootloader/src/bootloader/block_flow/ethereum/post_tx_op_sequencing.rs b/basic_bootloader/src/bootloader/block_flow/ethereum/post_tx_op_sequencing.rs index 9a2341f33..3c77410ef 100644 --- a/basic_bootloader/src/bootloader/block_flow/ethereum/post_tx_op_sequencing.rs +++ b/basic_bootloader/src/bootloader/block_flow/ethereum/post_tx_op_sequencing.rs @@ -121,8 +121,8 @@ where // Events result_keeper.events(io.events_iterator()); - // // 3. Verify/apply reads and writes - cycle_marker::wrap!("verify_and_apply_batch", { + // 3. Verify/apply reads and writes + cycle_marker::wrap!("state_commitment_update", { io.update_commitment(None, &mut logger, result_keeper); }); diff --git a/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_proving_multiblock_batch.rs b/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_proving_multiblock_batch.rs index ef685ffa6..f3893cf59 100644 --- a/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_proving_multiblock_batch.rs +++ b/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_proving_multiblock_batch.rs @@ -96,17 +96,20 @@ where da_commitment_scheme ); } - write_pubdata( - batch_data - .da_commitment_generator - .as_mut() - .unwrap() - .as_mut(), - result_keeper, - block_hash, - metadata.block_timestamp(), - &mut io, - ); + // See `post_tx_op_proving_singleblock_batch.rs` for the rationale. + cycle_marker::wrap!("da_commitment", { + write_pubdata( + batch_data + .da_commitment_generator + .as_mut() + .unwrap() + .as_mut(), + result_keeper, + block_hash, + metadata.block_timestamp(), + &mut io, + ); + }); io.logs_storage .apply_to_array_vec(&mut batch_data.logs_storage); @@ -142,8 +145,8 @@ where last_block_timestamp, }; - // 3. Verify/apply reads and writes - cycle_marker::wrap!("verify_and_apply_batch", { + // 3. Verify/apply reads and writes — state-tree merkle commit. + cycle_marker::wrap!("state_commitment_update", { IOTeardown::<_>::update_commitment( &mut io, Some(&mut state_commitment), diff --git a/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_proving_singleblock_batch.rs b/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_proving_singleblock_batch.rs index e3728b873..94d81e6b1 100644 --- a/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_proving_singleblock_batch.rs +++ b/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_proving_singleblock_batch.rs @@ -88,13 +88,22 @@ where let mut da_commitment_generator = da_commitment_generator_from_scheme(io.da_commitment_scheme.unwrap(), A::default()) .unwrap(); - write_pubdata( - da_commitment_generator.as_mut(), - result_keeper, - block_hash, - metadata.block_timestamp(), - &mut io, - ); + // For keccak DA (`BlobsAndPubdataKeccak256`), `write_pubdata` streams + // bytes through `Keccak256CommitmentGenerator`, which absorbs them + // into the keccak state — this is where the bulk of keccak + // delegations fire on the DA-commit path. For blob DA + // (`BlobsZKsyncOS`) the same call just appends to a buffer (no + // hashing yet); the actual blob KZG work happens in `.finalize()` + // below and is already captured by the `blob_versioned_hash` marker. + cycle_marker::wrap!("da_commitment", { + write_pubdata( + da_commitment_generator.as_mut(), + result_keeper, + block_hash, + metadata.block_timestamp(), + &mut io, + ); + }); let (multichain_root, settlement_layer_chain_id) = read_batch_context_inputs(&mut io); @@ -152,8 +161,10 @@ where chain_state_commitment_before ); - // update state commitment - cycle_marker::wrap!("verify_and_apply_batch", { + // update state commitment — this is the state-tree merkle commit + // (Blake-heavy). Distinct from `da_commitment` (keccak/blob over + // pubdata) and `blob_versioned_hash` (KZG per blob). + cycle_marker::wrap!("state_commitment_update", { IOTeardown::<_>::update_commitment( &mut io, Some(&mut state_commitment), diff --git a/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_sequencing.rs b/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_sequencing.rs index 087c06118..b0a1ff82b 100644 --- a/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_sequencing.rs +++ b/basic_bootloader/src/bootloader/block_flow/zk/post_tx_op/post_tx_op_sequencing.rs @@ -73,15 +73,20 @@ where result_keeper.logs(io.logs_storage.messages_ref_iter()); result_keeper.events(io.events_storage.events_ref_iter()); - write_pubdata( - &mut NopCommitmentGenerator, - result_keeper, - block_hash, - metadata.block_timestamp(), - &mut io, - ); + // Sequencing-mode post-op uses NopCommitmentGenerator (no DA work), + // but we still mark `da_commitment` for parity with the proving + // paths so the bench label set is consistent across STFs. + cycle_marker::wrap!("da_commitment", { + write_pubdata( + &mut NopCommitmentGenerator, + result_keeper, + block_hash, + metadata.block_timestamp(), + &mut io, + ); + }); - cycle_marker::wrap!("verify_and_apply_batch", { + cycle_marker::wrap!("state_commitment_update", { io.update_commitment(None, &mut logger, result_keeper); }); Ok(()) diff --git a/basic_system/src/system_functions/bls12_381/pairing.rs b/basic_system/src/system_functions/bls12_381/pairing.rs index 30b45f70f..d843deca6 100644 --- a/basic_system/src/system_functions/bls12_381/pairing.rs +++ b/basic_system/src/system_functions/bls12_381/pairing.rs @@ -1,5 +1,6 @@ use super::*; use alloc::vec::Vec; +use crypto::ark_ec::AffineRepr; use crypto::{ark_ec::pairing::Pairing, bls12_381::curves::Bls12_381}; use zk_ee::{ out_of_return_memory, @@ -78,25 +79,159 @@ fn bls12_381_pairing_as_system_function_inner< .try_into() .unwrap(), )?; + // e(O, Q) = e(P, O) = 1 in the target field, so degenerate pairs do not + // affect the multi-pairing product. Skip them after subgroup validation + // to save the per-pair Miller-loop precomputation that dominates the + // cost on Pectra degenerate inputs. + if g1.is_zero() || g2.is_zero() { + continue; + } g1_points.push(g1); g2_points.push(g2); } - let pairing_result = ::multi_pairing(g1_points, g2_points); output .try_extend([0u8; 31]) .map_err(|_| out_of_return_memory!())?; use crypto::ark_ff::Field; - if pairing_result.0 == ::TargetField::ONE { - output - .try_extend([1u8]) - .map_err(|_| out_of_return_memory!())?; + let success = if g1_points.is_empty() { + // Empty product equals the identity in the target field. + true } else { - output - .try_extend([0u8]) - .map_err(|_| out_of_return_memory!())?; - } + let pairing_result = ::multi_pairing(g1_points, g2_points); + pairing_result.0 == ::TargetField::ONE + }; + + output + .try_extend([success as u8]) + .map_err(|_| out_of_return_memory!())?; Ok(()) } + +#[cfg(test)] +mod tests { + use super::*; + use core::ops::Neg; + use crypto::bls12_381::eip2537::{serialize_g1_bytes, serialize_g2_bytes}; + use zk_ee::reference_implementations::{BaseResources, DecreasingNative}; + use zk_ee::system::Resource; + + fn encode_g1(point: G1Affine) -> [u8; G1_SERIALIZATION_LEN] { + let mut buf = [0u8; G1_SERIALIZATION_LEN]; + serialize_g1_bytes(point, &mut buf); + buf + } + + fn encode_g2(point: G2Affine) -> [u8; G2_SERIALIZATION_LEN] { + let mut buf = [0u8; G2_SERIALIZATION_LEN]; + serialize_g2_bytes(point, &mut buf); + buf + } + + fn encode_pair(g1: G1Affine, g2: G2Affine) -> [u8; BLS12_381_PAIR_LEN] { + let mut buf = [0u8; BLS12_381_PAIR_LEN]; + buf[..G1_SERIALIZATION_LEN].copy_from_slice(&encode_g1(g1)); + buf[G1_SERIALIZATION_LEN..].copy_from_slice(&encode_g2(g2)); + buf + } + + fn run(input: &[u8]) -> Vec { + let allocator = std::alloc::Global; + let mut resource = as Resource>::FORMAL_INFINITE; + let mut dst: Vec = Vec::new(); + Bls12381PairingCheckPrecompile::execute(input, &mut dst, &mut resource, allocator) + .expect("precompile should succeed on well-formed input"); + dst + } + + fn expect_check(input: &[u8], expected_true: bool) { + let dst = run(input); + let mut expected = [0u8; 32]; + expected[31] = expected_true as u8; + assert_eq!(dst.as_slice(), &expected[..]); + } + + #[test] + fn single_pair_both_infinity_returns_true() { + let input = [0u8; BLS12_381_PAIR_LEN]; + expect_check(&input, true); + } + + #[test] + fn single_pair_g1_infinity_returns_true() { + let mut input = [0u8; BLS12_381_PAIR_LEN]; + input[G1_SERIALIZATION_LEN..].copy_from_slice(&encode_g2(G2Affine::generator())); + expect_check(&input, true); + } + + #[test] + fn single_pair_g2_infinity_returns_true() { + let mut input = [0u8; BLS12_381_PAIR_LEN]; + input[..G1_SERIALIZATION_LEN].copy_from_slice(&encode_g1(G1Affine::generator())); + expect_check(&input, true); + } + + #[test] + fn many_infinity_pairs_return_true() { + let input = vec![0u8; 7 * BLS12_381_PAIR_LEN]; + expect_check(&input, true); + } + + #[test] + fn nontrivial_pair_returns_false_and_infinity_does_not_mask_it() { + // e(G1, G2) is the BLS12-381 generator pairing, which is not 1. + let nontrivial = encode_pair(G1Affine::generator(), G2Affine::generator()); + expect_check(&nontrivial, false); + + // Appending degenerate pairs must not flip the result to true. + let mut with_inf = nontrivial.to_vec(); + with_inf.extend_from_slice(&[0u8; BLS12_381_PAIR_LEN]); + expect_check(&with_inf, false); + + let mut prefixed = vec![0u8; BLS12_381_PAIR_LEN]; + prefixed.extend_from_slice(&nontrivial); + expect_check(&prefixed, false); + } + + #[test] + fn balanced_pair_returns_true_with_or_without_infinity_padding() { + // e(G1, G2) * e(-G1, G2) = e(G1, G2) * e(G1, G2)^{-1} = 1 + let g1 = G1Affine::generator(); + let g2 = G2Affine::generator(); + let balanced_a = encode_pair(g1, g2); + let balanced_b = encode_pair(g1.neg(), g2); + + let mut balanced = balanced_a.to_vec(); + balanced.extend_from_slice(&balanced_b); + expect_check(&balanced, true); + + // Interleaving degenerate pairs must keep the result true. + let mut interleaved = vec![0u8; BLS12_381_PAIR_LEN]; + interleaved.extend_from_slice(&balanced_a); + interleaved.extend_from_slice(&[0u8; BLS12_381_PAIR_LEN]); + interleaved.extend_from_slice(&balanced_b); + interleaved.extend_from_slice(&[0u8; BLS12_381_PAIR_LEN]); + expect_check(&interleaved, true); + } + + #[test] + fn malformed_nonzero_encoding_is_still_rejected() { + // A G1 input where the y-coordinate is forced to zero with a non-zero x + // is not on the curve and must not be accepted as the point at infinity. + // This guards against any future refactor that filters before parsing. + let mut input = [0u8; BLS12_381_PAIR_LEN]; + // x = 1 in big-endian, padded to 48 bytes then to the 64-byte slot. + input[G1_SERIALIZATION_LEN - 1] = 1; + // y stays zero. G2 stays at infinity (irrelevant once G1 parse fails). + let allocator = std::alloc::Global; + let mut resource = as Resource>::FORMAL_INFINITE; + let mut dst: Vec = Vec::new(); + let err = + Bls12381PairingCheckPrecompile::execute(&input, &mut dst, &mut resource, allocator) + .expect_err("invalid G1 encoding must be rejected"); + // Sanity: we got an error rather than silently treating it as infinity. + let _ = err; + } +} diff --git a/bench_scripts/bench.sh b/bench_scripts/bench.sh index 498e825b0..4bee4e55a 100755 --- a/bench_scripts/bench.sh +++ b/bench_scripts/bench.sh @@ -45,11 +45,16 @@ run_block() { local block_samples_dir="$output_dir/opcode_samples/block_${blk}" local block_cycles_dir="$output_dir/opcode_cycles/block_${blk}" local block_stats_path="$output_dir/opcode_stats/block_${blk}.csv" + local precompile_stats_path="$output_dir/block_${blk}_precompile_stats.csv" + local precompile_samples_dir="$output_dir/precompile_samples/block_${blk}" + local precompile_cycles_dir="$output_dir/precompile_cycles/block_${blk}" rm -rf "$block_samples_dir" "$block_cycles_dir" - rm -f "$block_stats_path" + rm -rf "$precompile_samples_dir" "$precompile_cycles_dir" + rm -f "$block_stats_path" "$precompile_stats_path" mkdir -p "$output_dir/opcode_samples" "$output_dir/opcode_cycles" "$output_dir/opcode_stats" + mkdir -p "$output_dir/precompile_samples" "$output_dir/precompile_cycles" echo "==> Benchmarking block $blk..." ZKSYNC_RISC_V_RUN=true \ @@ -57,6 +62,9 @@ run_block() { OPCODE_CYCLE_SAMPLES_DIR="$block_cycles_dir" \ OPCODE_STATS_PATH="$block_stats_path" \ MARKER_PATH="$output_dir/block_${blk}.bench" \ + PRECOMPILE_STATS_PATH="$precompile_stats_path" \ + PRECOMPILE_SAMPLES_DIR="$precompile_samples_dir" \ + LABEL_CYCLE_SAMPLES_DIR="$precompile_cycles_dir" \ cargo run --manifest-path "$ETH_RUNNER_MANIFEST" \ --release -j 3 \ --features "$FEATURES" \ @@ -67,9 +75,21 @@ run_block() { run_precompiles() { local output_dir="$1" + # Use a dedicated sub-namespace so we don't clobber per-block dirs that + # `run_all_blocks` already wrote under $output_dir/precompile_{samples,cycles}/block_*. + local samples_dir="$output_dir/precompile_samples/test_precompiles" + local cycles_dir="$output_dir/precompile_cycles/test_precompiles" + + # Clean only our subdir so per-block artifacts survive. + rm -rf "$samples_dir" "$cycles_dir" + mkdir -p "$samples_dir" "$cycles_dir" + echo "==> Benchmarking precompiles..." ZKSYNC_RISC_V_RUN=true \ MARKER_PATH="$output_dir/precompiles.bench" \ + PRECOMPILE_STATS_PATH="$output_dir/precompile_stats.csv" \ + PRECOMPILE_SAMPLES_DIR="$samples_dir" \ + LABEL_CYCLE_SAMPLES_DIR="$cycles_dir" \ cargo test --manifest-path "$PRECOMPILE_MANIFEST" \ --release -j 3 \ --features "$PRECOMPILE_FEATURES" \ @@ -77,6 +97,53 @@ run_precompiles() { > "$output_dir/precompiles.out" 2>&1 } +join_precompile_samples_run() { + local output_dir="$1" + + local pairs=() + local bench_args=() + + # Test-crate cycle bench (test_precompiles). Lives in its own subdir so + # it doesn't collide with the per-block subdirs that share the parent. + local tc_samples="$output_dir/precompile_samples/test_precompiles" + local tc_cycles="$output_dir/precompile_cycles/test_precompiles" + if [ -d "$tc_samples" ] && [ -d "$tc_cycles" ]; then + pairs+=("$tc_samples" "$tc_cycles") + if [ -f "$output_dir/precompiles.bench" ]; then + bench_args+=(--bench-file "$output_dir/precompiles.bench") + else + bench_args+=(--bench-file /dev/null) + fi + fi + + # Per-block eth_runner bench (real workloads). + for dir in "$BLOCKS_DIR"/*/; do + local blk + blk="$(basename "$dir")" + local p_samples="$output_dir/precompile_samples/block_${blk}" + local p_cycles="$output_dir/precompile_cycles/block_${blk}" + local p_bench="$output_dir/block_${blk}.bench" + if [ -d "$p_samples" ] && [ -d "$p_cycles" ]; then + pairs+=("$p_samples" "$p_cycles") + if [ -f "$p_bench" ]; then + bench_args+=(--bench-file "$p_bench") + else + bench_args+=(--bench-file /dev/null) + fi + fi + done + + if [ ${#pairs[@]} -ge 2 ]; then + echo "==> Joining precompile per-execution samples (${#pairs[@]} dirs across $((${#pairs[@]} / 2)) sources)..." + python3 "$REPO_ROOT/bench_scripts/join_precompile_samples.py" \ + "${pairs[@]}" \ + "${bench_args[@]}" \ + --out-dir "$output_dir/precompile_joined" \ + --summary \ + > "$output_dir/precompile_joined_summary.txt" 2>&1 || true + fi +} + run_all_blocks() { local output_dir="$1" for dir in "$BLOCKS_DIR"/*/; do @@ -89,6 +156,7 @@ do_baseline() { build_riscv_binary run_all_blocks "$BASELINE_DIR" run_precompiles "$BASELINE_DIR" + join_precompile_samples_run "$BASELINE_DIR" echo "==> Baseline saved to $BASELINE_DIR" } @@ -97,6 +165,7 @@ do_run() { build_riscv_binary run_all_blocks "$CURRENT_DIR" run_precompiles "$CURRENT_DIR" + join_precompile_samples_run "$CURRENT_DIR" echo "==> Results saved to $CURRENT_DIR" } @@ -207,6 +276,28 @@ do_compare() { "${cycle_args[@]}" --gas-stats "${gas_args[@]}" --sample-dirs "${cycle_sample_args[@]}" \ 2>/dev/null || true fi + # Aggregate per-precompile stats across test-crate + all block benchmarks. + local precompile_stats_args=() + if [ -f "$BASELINE_DIR/precompile_stats.csv" ] && [ -f "$CURRENT_DIR/precompile_stats.csv" ]; then + precompile_stats_args+=( + "$BASELINE_DIR/precompile_stats.csv" + "$CURRENT_DIR/precompile_stats.csv" + ) + fi + for dir in "$BLOCKS_DIR"/*/; do + local blk + blk="$(basename "$dir")" + local base_csv="$BASELINE_DIR/block_${blk}_precompile_stats.csv" + local head_csv="$CURRENT_DIR/block_${blk}_precompile_stats.csv" + if [ -f "$base_csv" ] && [ -f "$head_csv" ]; then + precompile_stats_args+=("$base_csv" "$head_csv") + fi + done + if [ ${#precompile_stats_args[@]} -ge 2 ]; then + python3 "$REPO_ROOT/bench_scripts/compare_precompile_stats.py" \ + "${precompile_stats_args[@]}" \ + 2>/dev/null || true + fi } do_flamegraph() { diff --git a/bench_scripts/benchlib.py b/bench_scripts/benchlib.py new file mode 100644 index 000000000..dfc533b2f --- /dev/null +++ b/bench_scripts/benchlib.py @@ -0,0 +1,154 @@ +"""Shared helpers for bench_scripts/*. + +Concentrates the formatting, percentile, and sample-loading utilities that +multiple scripts had been re-implementing slightly differently. Keep this +module dependency-free (stdlib only) so any script in `bench_scripts/` can +import it without adding a wheel. + +Effective-cycle constants live alongside the helpers and MUST stay in +lockstep with `cycle_marker/src/lib.rs::print_cycle_markers` (see +`compare_bench.py` for the unknown-delegation handling rationale). +""" + +import os + + +# Delegation IDs — must match cycle_marker/src/lib.rs::print_cycle_markers +BLAKE_DELEGATION_ID = 1991 +BIGINT_DELEGATION_ID = 1994 +KECCAK_DELEGATION_ID = 1995 + +# Effective-cycle weights — must match cycle_marker's BLAKE_DELEGATION_COEFF, +# BIGINT_DELEGATION_COEFF, KECCAK_DELEGATION_COEFF. If these drift, the +# Python-side reports will diverge from Rust-side `block_effective` and +# from the per-execution `.effective.cycles` dumps. +BLAKE_DELEGATION_COEFF = 16 +BIGINT_DELEGATION_COEFF = 4 +KECCAK_DELEGATION_COEFF = 4 + + +def median_int(values): + """Median of an iterable of integers (returns int). Empty → 0.""" + vals = sorted(values) + if not vals: + return 0 + mid = len(vals) // 2 + if len(vals) % 2 == 0: + return (vals[mid - 1] + vals[mid]) // 2 + return vals[mid] + + +def median_float(values): + """Median of an iterable of floats. Empty → None.""" + vals = sorted(values) + if not vals: + return None + mid = len(vals) // 2 + if len(vals) % 2 == 0: + return (vals[mid - 1] + vals[mid]) / 2 + return vals[mid] + + +def percentile(sorted_vals, p): + """Nearest-rank percentile (1-indexed). `sorted_vals` must already be sorted. + + Returns 0 for an empty input. + """ + if not sorted_vals: + return 0 + rank = max(1, -(-len(sorted_vals) * p // 100)) # ceiling division + return sorted_vals[min(rank, len(sorted_vals)) - 1] + + +def pct(old, new): + """Percent change `(new - old) / old * 100`. + + Returns 0 when both sides are 0, `inf` when old is 0 and new > 0. + """ + if old == 0: + return 0.0 if new == 0 else float("inf") + return (new - old) / old * 100 + + +def fmt_pct(val): + """Format a percent value as ` (+1.2%)` / ` (-3.4%)`. Empty for ~0.""" + if val is None: + return "" + if val == float("inf"): + return " (new)" + if abs(val) < 0.005: + return "" + return f" ({val:+.1f}%)" + + +def fmt_val_pct(base, head): + """Format `head (+1.2%)` for a base/head integer pair.""" + return f"{head}{fmt_pct(pct(base, head))}" + + +def fmt_ratio_pct(base, head): + """Like `fmt_val_pct` but for float ratios with one decimal.""" + if base is None or head is None: + return "—" + return f"{head:.2f}{fmt_pct(pct(base, head))}" + + +def ratio(num, den): + """`num / den` for positive `den`, else 0.0.""" + return num / den if den > 0 else 0.0 + + +def safe_listdir(path): + """`os.listdir(path)` that returns `[]` on any OSError (missing path, not a directory, /dev/null, …).""" + try: + return os.listdir(path) + except OSError: + return [] + + +def load_int_samples(path): + """Load one integer per non-empty line. Used for `.cycles` / `.effective.cycles` files.""" + samples = [] + with open(path) as f: + for line in f: + line = line.strip() + if line: + samples.append(int(line)) + return samples + + +def load_gas_native_samples(path): + """Load `gas,native` per line. Used for `.samples` files emitted by tracers.""" + samples = [] + with open(path) as f: + for line in f: + line = line.strip() + if not line: + continue + parts = line.split(",") + samples.append((int(parts[0]), int(parts[1]))) + return samples + + +def list_label_files(samples_dir, raw_suffix=".cycles", effective_suffix=".effective.cycles"): + """Return `(raw_names, effective_names, opcode_to_file)` for a samples dir. + + - `raw_names`: set of labels with a `.cycles` file (NOT `.effective.cycles`). + - `effective_names`: set of labels with a `.effective.cycles` file. + - `opcode_to_file`: dict label → filename to prefer (effective when present). + """ + entries = set(safe_listdir(samples_dir)) + raw_names = set() + effective_names = set() + opcode_to_file = {} + for name in entries: + if name.endswith(effective_suffix): + label = name[: -len(effective_suffix)] + effective_names.add(label) + opcode_to_file[label] = name + for name in entries: + if name.endswith(raw_suffix) and not name.endswith(effective_suffix): + label = name[: -len(raw_suffix)] + raw_names.add(label) + opcode_to_file.setdefault(label, name) + return raw_names, effective_names, opcode_to_file diff --git a/bench_scripts/compare_bench.py b/bench_scripts/compare_bench.py index 8fc52e93e..3df9f6fb6 100644 --- a/bench_scripts/compare_bench.py +++ b/bench_scripts/compare_bench.py @@ -1,10 +1,28 @@ +import os import sys import re import ast -U256BIGINTOPS_RATIO = 4 -BLAKE2ROUNDEXTENDED_RATIO = 16 -KECCAK_RATIO = 4 # TODO(EVM-1242): calibrate with actual proving benchmarks +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from benchlib import ( # noqa: E402 + BIGINT_DELEGATION_COEFF, + BIGINT_DELEGATION_ID, + BLAKE_DELEGATION_COEFF, + BLAKE_DELEGATION_ID, + KECCAK_DELEGATION_COEFF, + KECCAK_DELEGATION_ID, + pct as pct_change, # historical name +) + +# Unknown-delegation policy: when this parser computes effective from the +# raw `.bench` text it adds +1 per occurrence (coefficient = 1) for any +# delegation ID outside the weighted set (BLAKE/BIGINT/KECCAK). This is +# compare_bench's deliberate choice — it keeps unfamiliar delegation IDs +# visible in headline cycles without requiring script updates. The +# Rust-side `effective_of` helper does NOT apply this fallback because it +# operates on per-execution sample dumps restricted to the three weighted +# IDs; that's why headline `block_effective` can differ slightly from +# compare_bench.py's `Eff` column. def parse_cycle_markers(text): results = {} @@ -17,14 +35,40 @@ def parse_cycle_markers(text): m = re.match(r"(\w+): net cycles: (\d+), net delegations: (\{.*\})", line.strip()) if m: name = m.group(1) + # Compatibility aliases for marker-name transitions. These + # let bench-base on older merge-bases match bench-head's + # current names so the PR-comment rows pair up correctly. + # TODO: drop each alias one PR cycle after the merge-base + # SHA on `draft-0.4.0` reliably contains the new name. + # + # Alias 1: `_execution_environment` → ``. + # Introduced by feat(bench): mark user-EVM-EE keccak/ecrecover. + # The outer cycles INCLUDE the inner ones, so collapsing + # onto the base name + max-fold picks the larger value — + # avoiding double rows in the PR comment. + # Alias 2: `verify_and_apply_batch` → `state_commitment_update`. + # Introduced when the marker was renamed (the old name was + # misleading — it always wrapped only the state-tree commit). + if name.endswith("_execution_environment"): + name = name[: -len("_execution_environment")] + elif name == "verify_and_apply_batch": + name = "state_commitment_update" raw = int(m.group(2)) delegs = ast.literal_eval(m.group(3)) - blake = delegs.get(1991, 0) - bigint = delegs.get(1994, 0) - keccak = delegs.get(1995, 0) - weighted = blake * BLAKE2ROUNDEXTENDED_RATIO + bigint * U256BIGINTOPS_RATIO + keccak * KECCAK_RATIO - weighted += sum(v for k, v in delegs.items() if k not in (1991, 1994, 1995)) + blake = delegs.get(BLAKE_DELEGATION_ID, 0) + bigint = delegs.get(BIGINT_DELEGATION_ID, 0) + keccak = delegs.get(KECCAK_DELEGATION_ID, 0) + weighted = ( + blake * BLAKE_DELEGATION_COEFF + + bigint * BIGINT_DELEGATION_COEFF + + keccak * KECCAK_DELEGATION_COEFF + ) + weighted += sum( + v + for k, v in delegs.items() + if k not in (BLAKE_DELEGATION_ID, BIGINT_DELEGATION_ID, KECCAK_DELEGATION_ID) + ) eff = raw + weighted prev = results.get(name) @@ -38,18 +82,24 @@ def parse_cycle_markers(text): } return results -def pct_change(old, new): - if old == 0: - return float('inf') if new > 0 else 0.0 - return (new - old) / old * 100 - def main(): - if len(sys.argv) != 2: - print("Usage: python compare_bench.py '[...]'") + # `--no-title` lets the caller (e.g. bench.yml) provide its own section + # heading; useful when the same script is invoked multiple times to + # render different sub-tables under separate headings/spoilers. + # `--sort-by-symbol` groups rows by Symbol (then benchmark name) so + # all rows for the same marker line up — easier to scan when the + # table has many (benchmark × symbol) combinations like the + # block-level sub-phases view. + cli_flags = {"--no-title", "--sort-by-symbol"} + args = [a for a in sys.argv[1:] if a not in cli_flags] + emit_title = "--no-title" not in sys.argv[1:] + sort_by_symbol = "--sort-by-symbol" in sys.argv[1:] + if len(args) != 1: + print("Usage: python compare_bench.py [--no-title] [--sort-by-symbol] '[...]'") sys.exit(1) try: - benchmarks = ast.literal_eval(sys.argv[1]) + benchmarks = ast.literal_eval(args[0]) except Exception as e: print(f"Invalid input format: {e}") sys.exit(1) @@ -84,6 +134,12 @@ def main(): b = base.get(sym, {}) h = head.get(sym, {}) + # Skip symbols absent on both sides (e.g. an explicitly-requested + # block-level sub-phase that doesn't exist in this run's bench + # file would otherwise produce a noisy all-zero row). + if not b and not h: + continue + b_raw = b.get('raw', 0) h_raw = h.get('raw', 0) b_blake = b.get('blake', 0) @@ -104,8 +160,19 @@ def main(): b_eff, h_eff, pct_change(b_eff, h_eff) )) + # Skip emitting anything when there are no rows so callers wrapping the + # output in `
` don't produce an empty section. + if not rows: + return + + if sort_by_symbol: + # row[0] = benchmark name, row[1] = symbol. Stable sort on + # (symbol, name) groups all rows of the same marker together. + rows.sort(key=lambda r: (r[1], r[0])) + # Markdown table - print("### Benchmark report\n") + if emit_title: + print("### Benchmark report\n") print("| Benchmark | Symbol | Base Eff | Head Eff (%) | Base Raw | Head Raw (%) | Base Blake | Head Blake (%) | Base Bigint | Head Bigint (%) | Base Keccak | Head Keccak (%) |") print("|-----------|--------|-----------|----------------|-----------|----------------|-------------|------------------|---------------|--------------------|--------------|--------------------|") diff --git a/bench_scripts/compare_opcode_cycles.py b/bench_scripts/compare_opcode_cycles.py index f01d4a865..08b3c706a 100644 --- a/bench_scripts/compare_opcode_cycles.py +++ b/bench_scripts/compare_opcode_cycles.py @@ -23,27 +23,13 @@ import re import sys - -def median_int(values): - """Return the true median of integer samples.""" - if not values: - return 0 - sorted_vals = sorted(values) - mid = len(sorted_vals) // 2 - if len(sorted_vals) % 2 == 0: - return (sorted_vals[mid - 1] + sorted_vals[mid]) // 2 - return sorted_vals[mid] - - -def median_float(values): - """Return the true median of float samples.""" - if not values: - return None - sorted_vals = sorted(values) - mid = len(sorted_vals) // 2 - if len(sorted_vals) % 2 == 0: - return (sorted_vals[mid - 1] + sorted_vals[mid]) / 2 - return sorted_vals[mid] +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from benchlib import ( # noqa: E402 + fmt_pct, + median_float, + median_int, + pct, +) def parse_cycle_stats(filename): @@ -133,17 +119,33 @@ def load_tracer_samples(samples_dir): def load_cycle_samples(samples_dir): - """Load per-opcode cycle samples from a directory.""" + """Load per-opcode cycle samples from a directory. + + Prefers `.effective.cycles` (raw + Blake/BigInt/Keccak delegation + weights, matching the block_effective formula) over `.cycles` + (raw only). Effective samples reflect true prover cost for opcodes whose + handlers delegate (SHA3, SLOAD/SSTORE, BALANCE/EXTCODE*, CALL family, + CREATE/CREATE2); raw samples undercount them. Falls back to raw per + opcode when the effective variant is absent. + """ stats = {} try: - entries = os.listdir(samples_dir) + entries = set(os.listdir(samples_dir)) except OSError: return stats + # Group by opcode: prefer .effective.cycles, fall back to .cycles. + opcode_to_file = {} for name in entries: - if not name.endswith(".cycles"): - continue - opcode = name[:-len(".cycles")] + if name.endswith(".effective.cycles"): + opcode = name[: -len(".effective.cycles")] + opcode_to_file[opcode] = name + for name in entries: + if name.endswith(".cycles") and not name.endswith(".effective.cycles"): + opcode = name[: -len(".cycles")] + opcode_to_file.setdefault(opcode, name) + + for opcode, name in opcode_to_file.items(): rows = [] with open(os.path.join(samples_dir, name)) as f: for line in f: @@ -247,20 +249,13 @@ def overlay_sampled_stats(base_stats, sampled_stats): return merged -def pct(old, new): - if old == 0: - return 0.0 if new == 0 else float("inf") - return (new - old) / old * 100 - - -def fmt_pct(val): - if abs(val) < 0.005: - return "" - return f" ({val:+.1f}%)" - - def ratio(num, den): - """Return num/den, or None if den is zero.""" + """Return num/den, or None if den is zero. + + NOTE: this differs from `benchlib.ratio` (which returns 0.0 for zero + denominator) — kept local so that downstream `fmt_ratio_pct` can + branch on None to render "n/a". + """ return num / den if den > 0 else None @@ -377,20 +372,27 @@ def format_table(rows, has_gas, label=""): lines.append("") if has_gas: + # When `--sample-dirs` is supplied (the CI path), `overlay_sampled_stats` + # replaces the .bench-aggregate cycle values with per-execution + # sampled values via `load_cycle_samples`, which prefers + # `.effective.cycles` (raw + Blake/BigInt/Keccak delegation + # weights). Both cycles and cyc/gas columns are therefore effective. + # Without `--sample-dirs` (local fallback), the cycles columns come + # directly from the .bench aggregate which is raw. lines.append( - "| Opcode | Count | Med Cycles (%) | Total Cycles (%) " - "| Med Cyc/Gas (%) | Worst Cyc/Gas (%) |" + "| Opcode | Count | Med Cycles eff (%) | Total Cycles eff (%) " + "| Med Cyc/Gas eff (%) | Worst Cyc/Gas eff (%) |" ) lines.append( - "|--------|-------|----------------|------------------" - "|-----------------|-------------------|" + "|--------|-------|--------------------|----------------------" + "|---------------------|-----------------------|" ) else: lines.append( - "| Opcode | Count | Med Cycles (%) | Total Cycles (%) |" + "| Opcode | Count | Med Cycles eff (%) | Total Cycles eff (%) |" ) lines.append( - "|--------|-------|----------------|------------------|" + "|--------|-------|--------------------|----------------------|" ) # Sort by head total cycles descending (biggest cost first) diff --git a/bench_scripts/compare_opcode_stats.py b/bench_scripts/compare_opcode_stats.py index 42ed03273..492c91c2d 100644 --- a/bench_scripts/compare_opcode_stats.py +++ b/bench_scripts/compare_opcode_stats.py @@ -11,18 +11,11 @@ import os import re +import os import sys - -def median_int(values): - """Return the true median of integer samples.""" - if not values: - return 0 - sorted_vals = sorted(values) - mid = len(sorted_vals) // 2 - if len(sorted_vals) % 2 == 0: - return (sorted_vals[mid - 1] + sorted_vals[mid]) // 2 - return sorted_vals[mid] +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from benchlib import fmt_pct, median_int, pct # noqa: E402 def parse_opcode_stats(filename): @@ -197,18 +190,6 @@ def overlay_sampled_stats(base_stats, sampled_stats): return merged -def pct(old, new): - if old == 0: - return 0.0 if new == 0 else float("inf") - return (new - old) / old * 100 - - -def fmt_pct(val): - if abs(val) < 0.005: - return "" - return f" ({val:+.1f}%)" - - def compare(base_stats, head_stats): """Return list of rows for opcodes with changed avg_gas or avg_native.""" all_opcodes = sorted(set(base_stats) | set(head_stats)) diff --git a/bench_scripts/compare_precompile_stats.py b/bench_scripts/compare_precompile_stats.py new file mode 100755 index 000000000..d60205173 --- /dev/null +++ b/bench_scripts/compare_precompile_stats.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +"""Compare base vs head precompile stats CSVs. + +Accepts one or more base/head CSV pairs as positional args and aggregates +stats across all of them (matching `compare_opcode_stats.py`'s shape). + +Usage: + python compare_precompile_stats.py [label] + python compare_precompile_stats.py ... [label] + +CSV columns produced by PrecompileStatsTracer::write_csv: + name,address,count,avg_gas,median_gas,min_gas,max_gas, + avg_native,median_native,min_native,max_native,native_per_gas + +Aggregation across sources (per precompile): +- `count`: sum +- `avg_gas` / `avg_native`: count-weighted mean (re-derived from totals) +- `med_gas` / `med_native`: count-weighted mean of per-source medians +- `min_gas` / `min_native`: min across sources +- `max_gas` / `max_native`: max across sources + +Exits 0 with no output if nothing changed or base CSVs are empty/absent. +""" + +import csv +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from benchlib import fmt_pct, pct # noqa: E402 + + +def parse_csv(path): + """Return dict keyed by precompile name.""" + stats = {} + try: + with open(path) as f: + reader = csv.DictReader(f) + for row in reader: + try: + stats[row["name"]] = { + "address": row["address"], + "count": int(row["count"]), + "avg_gas": float(row["avg_gas"]), + "med_gas": int(row["median_gas"]), + "min_gas": int(row["min_gas"]), + "max_gas": int(row["max_gas"]), + "avg_native": float(row["avg_native"]), + "med_native": int(row["median_native"]), + "min_native": int(row["min_native"]), + "max_native": int(row["max_native"]), + } + except (ValueError, KeyError): + continue + except FileNotFoundError: + pass + return stats + + +def aggregate(sources): + """Combine N per-source stat dicts into one aggregate dict per precompile. + + `count`s sum; min/max are extremes; averages and medians are count-weighted. + """ + combined = {} + for stats in sources: + for name, s in stats.items(): + cnt = s["count"] + if cnt <= 0: + continue + if name not in combined: + combined[name] = { + "address": s["address"], + "count": 0, + "_wt_avg_gas": 0.0, + "_wt_avg_native": 0.0, + "_wt_med_gas": 0.0, + "_wt_med_native": 0.0, + "min_gas": s["min_gas"], + "max_gas": s["max_gas"], + "min_native": s["min_native"], + "max_native": s["max_native"], + } + c = combined[name] + c["count"] += cnt + c["_wt_avg_gas"] += s["avg_gas"] * cnt + c["_wt_avg_native"] += s["avg_native"] * cnt + c["_wt_med_gas"] += s["med_gas"] * cnt + c["_wt_med_native"] += s["med_native"] * cnt + c["min_gas"] = min(c["min_gas"], s["min_gas"]) + c["max_gas"] = max(c["max_gas"], s["max_gas"]) + c["min_native"] = min(c["min_native"], s["min_native"]) + c["max_native"] = max(c["max_native"], s["max_native"]) + + for c in combined.values(): + total = c["count"] + if total > 0: + c["avg_gas"] = c["_wt_avg_gas"] / total + c["avg_native"] = c["_wt_avg_native"] / total + c["med_gas"] = round(c["_wt_med_gas"] / total) + c["med_native"] = round(c["_wt_med_native"] / total) + else: + c["avg_gas"] = 0.0 + c["avg_native"] = 0.0 + c["med_gas"] = 0 + c["med_native"] = 0 + for k in ("_wt_avg_gas", "_wt_avg_native", "_wt_med_gas", "_wt_med_native"): + del c[k] + return combined + + +def compare(base, head): + names = sorted(set(base) | set(head)) + rows = [] + for name in names: + b = base.get(name, {}) + h = head.get(name, {}) + b_max_gas = b.get("max_gas", 0) + h_max_gas = h.get("max_gas", 0) + b_max_native = b.get("max_native", 0) + h_max_native = h.get("max_native", 0) + if b_max_gas == h_max_gas and b_max_native == h_max_native: + continue + rows.append( + { + "name": name, + "address": h.get("address", b.get("address", "")), + "b_count": b.get("count", 0), + "h_count": h.get("count", 0), + "b_max_gas": b_max_gas, + "h_max_gas": h_max_gas, + "b_med_gas": b.get("med_gas", 0), + "h_med_gas": h.get("med_gas", 0), + "b_max_native": b_max_native, + "h_max_native": h_max_native, + "b_med_native": b.get("med_native", 0), + "h_med_native": h.get("med_native", 0), + } + ) + return rows + + +def format_table(rows, label=""): + if not rows: + return "" + lines = [] + title = "#### Precompile gas/native worst-case" + if label: + title += f" ({label})" + lines.append(title) + lines.append("") + lines.append( + "| Precompile | Address | Count | Max Gas | Med Gas | Max Native | Med Native |" + ) + lines.append( + "|------------|---------|-------|---------|---------|------------|------------|" + ) + rows.sort(key=lambda r: r["h_max_native"], reverse=True) + for r in rows: + count_s = f"{r['h_count']}" + if r["b_count"] != r["h_count"]: + count_s += fmt_pct(pct(r["b_count"], r["h_count"])) + max_gas_s = f"{r['h_max_gas']}" + fmt_pct(pct(r["b_max_gas"], r["h_max_gas"])) + med_gas_s = f"{r['h_med_gas']}" + fmt_pct(pct(r["b_med_gas"], r["h_med_gas"])) + max_native_s = f"{r['h_max_native']}" + fmt_pct( + pct(r["b_max_native"], r["h_max_native"]) + ) + med_native_s = f"{r['h_med_native']}" + fmt_pct( + pct(r["b_med_native"], r["h_med_native"]) + ) + lines.append( + f"| `{r['name']}` | `{r['address']}` | {count_s} | " + f"{max_gas_s} | {med_gas_s} | {max_native_s} | {med_native_s} |" + ) + return "\n".join(lines) + + +def main(): + args = sys.argv[1:] + if len(args) < 2: + print( + "Usage: compare_precompile_stats.py [label]\n" + " compare_precompile_stats.py " + " ... [label]", + file=sys.stderr, + ) + sys.exit(1) + + label = "" + # Backward compat: odd arg count means the last arg is a label. + if len(args) % 2 == 1: + label = args.pop() + + if len(args) < 2 or len(args) % 2 != 0: + print("Error: need even number of files (base/head pairs)", file=sys.stderr) + sys.exit(1) + + base_paths = [args[j] for j in range(0, len(args), 2)] + head_paths = [args[j] for j in range(1, len(args), 2)] + base = aggregate([parse_csv(p) for p in base_paths]) + head = aggregate([parse_csv(p) for p in head_paths]) + + if not head: + # Head CSVs missing or unparsable (e.g. partial artifact). Without + # head numbers there's nothing to report. + sys.exit(0) + if not base: + # Base side has no instrumentation (typical on the PR that introduces + # the precompile bench, where merge-base lacks the tracer). Print a + # head-only table so the data is still visible in the PR comment. + print(format_head_only_table(head, label)) + sys.exit(0) + rows = compare(base, head) + if not rows: + sys.exit(0) + print(format_table(rows, label)) + + +def format_head_only_table(head, label=""): + if not head: + return "" + summary = "Precompile gas/native worst-case (head only — base lacks instrumentation)" + if label: + summary += f" ({label})" + lines = [ + f"
{summary}", + "", + "| Precompile | Address | Count | Max Gas | Med Gas | Max Native | Med Native |", + "|------------|---------|-------|---------|---------|------------|------------|", + ] + rows = sorted(head.items(), key=lambda kv: kv[1].get("max_native", 0), reverse=True) + for name, h in rows: + lines.append( + f"| `{name}` | `{h.get('address', '')}` | {h['count']} | " + f"{h['max_gas']} | {h['med_gas']} | {h['max_native']} | {h['med_native']} |" + ) + lines.append("") + lines.append("
") + return "\n".join(lines) + + +if __name__ == "__main__": + main() diff --git a/bench_scripts/cycles_per_native_report.py b/bench_scripts/cycles_per_native_report.py new file mode 100644 index 000000000..f6a048e61 --- /dev/null +++ b/bench_scripts/cycles_per_native_report.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python3 +"""Per-execution cycles/native ratios for opcodes and precompiles. + +Reads: + - per-opcode tracer samples (`/.samples`, `gas,native` per line) + - per-opcode cycle samples (`/.effective.cycles` preferred, + else `.cycles`) + - per-precompile tracer samples (`/.samples`) + - per-label cycle samples (`/