Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 107 additions & 11 deletions whir/examples/pareto_frontier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@
//! 256 polynomials of size 2^(m-8), opened at one common point), and plots two
//! log-log views with the same y-axis (argument size = postcard proof bytes):
//!
//! - x = total committed oracle length (Σ codeword sizes) — an analytic,
//! noise-free proxy for prover work (LDE FFTs + Merkle hashing).
//! - x = a **modelled prover cost** (base-field multiply-equivalents): per
//! committed codeword, the encode (FFT) and Merkle-hash terms, plus WHIR's
//! open-phase sumcheck and the claim-batching term. This extends the older
//! "total committed oracle length" proxy, which priced only the commit
//! phase and so under-counted WHIR (see the cost-model constants below).
//! - x = measured prover wall-clock (commit + open).
//!
//! Each panel draws dotted iso-knob curves, shaded by value: WHIR grouped by k
//! (blues), FRI grouped by log_blowup (reds). Lower-left is better.
//! Each panel draws the full sample cloud plus one per-protocol Pareto frontier
//! (lower-left is better). The raw oracle length is still emitted to the CSV.
//!
//! # Knobs swept
//!
Expand Down Expand Up @@ -104,6 +107,22 @@ const FRI_LOG_FINAL_POLY_LEN: usize = 0;
/// lengths to base-field elements.
const EXT_DEGREE: u128 = 4;

// --- Prover-cost model constants (Steps 1-2 of the cost model). -------------
// Everything is expressed in base-field multiply-equivalents, so the FFT,
// Merkle-hash, sumcheck and batching terms can be summed into one number. The
// constants are rough order-of-magnitude values: only the *relative* shape of
// the modelled cost matters here. Calibrating them against a handful of
// measured points (model "Step 3") would make the absolute scale meaningful.

/// Cost of one extension-field multiply, in base-field multiplies (~d² with
/// schoolbook for a degree-d extension; ~9 with Karatsuba for d = 4).
const EXT_MUL_COST: f64 = 16.0;
/// Cost of one Poseidon permutation, in base-field multiplies (rough).
const PERM_COST: f64 = 200.0;
/// Degree of WHIR's eq-weighted sumcheck constraint (a constant multiplier on
/// the per-cell sumcheck work).
const SUMCHECK_DEG: f64 = 2.0;

/// Minimum FRI query count reaching `SECURITY_LEVEL` under the capacity formula
/// `log_blowup * queries + query_pow >= security_level`.
fn fri_min_queries(log_blowup: usize, query_pow_bits: usize) -> usize {
Expand Down Expand Up @@ -163,6 +182,10 @@ struct WhirBuilt<MT: Mmcs<F>, Ch> {
/// overlooked term in argument size: high folding factors make every queried
/// leaf much wider, even if they reduce the number of rounds.
query_widths: Vec<usize>,
/// Modelled prover cost (base-field multiply-equivalents): encode (FFT) +
/// Merkle hash, per committed codeword, plus the open-phase sumcheck and
/// claim-batching terms the raw oracle length omits.
model_cost: f64,
}

/// Build a WHIR rig for `(m, log_width)` with an arbitrary folding strategy and
Expand Down Expand Up @@ -231,6 +254,37 @@ where
.at_round(config.round_parameters.len()),
);

// ---- Prover-cost model (Steps 1-2): encode + hash + sumcheck + batch. ----
let mut model_cost = 0.0f64;
// Commit, initial base-field codeword: one FFT (encode) + Merkle hash.
{
let n = (1u128 << (num_variables + starting)) as f64;
model_cost += n * n.log2(); // T_encode (base field, w_mul = 1)
// Leaves = codeword / coset; clamp so an oversized coset gives one leaf.
let leaves =
(1u128 << (num_variables + starting).saturating_sub(config.folding_factor.at_round(0))) as f64;
model_cost += (n + leaves) * PERM_COST; // T_hash
}
// Commit, each per-round extension-field codeword. Unlike FRI, WHIR
// re-encodes (a fresh FFT) every round.
for r in &config.round_parameters {
let n = (1u128 << (r.num_variables + r.log_inv_rate)) as f64;
model_cost += n * n.log2() * EXT_MUL_COST; // T_encode (extension field)
let n_base = n * EXT_DEGREE as f64;
let leaves =
(1u128 << (r.num_variables + r.log_inv_rate).saturating_sub(r.folding_factor)) as f64;
model_cost += (n_base + leaves) * PERM_COST; // T_hash
}
// Open: multilinear sumcheck over the 2^m hypercube. The cube halves each
// round, so total prover work ≈ 2·2^m extension-field multiply-adds. FRI
// has no analogue of this term.
model_cost += SUMCHECK_DEG * 2.0 * (1u128 << num_variables) as f64 * EXT_MUL_COST;
// Open: batching the `2^log_width` evaluation claims into one constraint.
// The opening points share the trailing coordinates, so the eq-combination
// factorises to ~one pass over the cube; generic (non-shared) points would
// scale this with the claim count.
model_cost += (1u128 << num_variables) as f64 * EXT_MUL_COST;

let seed = BENCH_SEED
^ ((num_variables as u64) << 16)
^ ((log_width as u64) << 8)
Expand Down Expand Up @@ -268,6 +322,7 @@ where
oracle_len,
queries,
query_widths,
model_cost,
})
}

Expand Down Expand Up @@ -391,6 +446,36 @@ fn fri_oracle_len(
total
}

/// Modelled FRI prover cost (Steps 1-2), in base-field multiply-equivalents.
///
/// Unlike WHIR, FRI does a single FFT (the input-matrix LDE) and then folds for
/// free, so only the input oracle carries an encode term; the commit-phase
/// codewords are hashed but not re-encoded, and there is no sumcheck.
fn fri_cost(num_variables: usize, log_width: usize, log_blowup: usize, max_log_arity: usize) -> f64 {
let per_col_log = (num_variables - log_width + log_blowup) as f64;
let n_input = (1u128 << (num_variables + log_blowup)) as f64;

// Commit, input matrix: one (per-column) FFT over the base field + hash.
let mut model = n_input * per_col_log; // T_encode (base field)
let input_leaves = (1u128 << (num_variables - log_width + log_blowup)) as f64;
model += (n_input + input_leaves) * PERM_COST; // T_hash

// Commit phase: hash only (FFT-free folding), extension field.
let floor = log_blowup + FRI_LOG_FINAL_POLY_LEN;
let mut h = num_variables - log_width + log_blowup;
while h > floor {
let arity = max_log_arity.min(h - floor);
let n_base = EXT_DEGREE as f64 * (1u128 << h) as f64;
let leaves = (1u128 << (h - arity)) as f64;
model += (n_base + leaves) * PERM_COST; // T_hash, no T_encode
h -= arity;
}

// Batching: forming g = Σ αⁱ·fᵢ — one extension-field pass over the matrix.
model += n_input * EXT_MUL_COST; // T_batch
model
}

fn fri_build<InMmcs, ChMmcs, Ch>(
num_variables: usize,
log_width: usize,
Expand Down Expand Up @@ -864,8 +949,13 @@ struct Record {
/// This config's knob values; currently emitted only for possible future CSV/plot labels.
#[allow(dead_code)]
knobs: Vec<(&'static str, usize)>,
/// Total committed oracle length in field elements (prover-cost proxy).
/// Total committed oracle length in base-field elements (the original,
/// commit-only proxy). Kept in the CSV for reference.
oracle_len: f64,
/// Modelled prover cost (base-field multiply-equivalents): commit (encode +
/// hash) plus the open-phase sumcheck + batching terms. This is the x-axis
/// of the left panel.
model_cost: f64,
/// Argument size: postcard-serialised proof bytes.
proof_bytes: f64,
/// Measured prover wall-clock (commit + open), milliseconds.
Expand Down Expand Up @@ -962,6 +1052,7 @@ fn main() {
label: cand.name,
knobs: vec![("k", cand.group), ("start", cand.starting)],
oracle_len: built.oracle_len as f64,
model_cost: built.model_cost,
proof_bytes: proof_bytes as f64,
prove_ms: prove_ms as f64,
verify_us: verify_us as f64,
Expand Down Expand Up @@ -1005,6 +1096,7 @@ fn main() {
.expect("postcard FRI proof + openings")
.len();
let oracle_len = fri_oracle_len(m, log_width, log_blowup, max_log_arity);
let model_cost = fri_cost(m, log_width, log_blowup, max_log_arity);

let arities: Vec<usize> = proof
.query_proofs
Expand Down Expand Up @@ -1040,6 +1132,7 @@ fn main() {
label: format!("ρ⁻¹=2^{log_blowup} a=2^{max_log_arity} q={num_queries}"),
knobs: vec![("blowup", log_blowup), ("arity", max_log_arity)],
oracle_len: oracle_len as f64,
model_cost,
proof_bytes: proof_bytes as f64,
prove_ms: prove_ms as f64,
verify_us: verify_us as f64,
Expand Down Expand Up @@ -1067,14 +1160,17 @@ fn csv_escape(s: &str) -> String {

fn write_csv(records: &[Record]) {
let mut s = String::new();
s.push_str("protocol,label,oracle_len_elems,proof_bytes,prove_ms,verify_us,is_default\n");
s.push_str(
"protocol,label,oracle_len_elems,model_cost,proof_bytes,prove_ms,verify_us,is_default\n",
);
for r in records {
let _ = writeln!(
s,
"{},{},{:.0},{:.0},{:.0},{:.0},{}",
"{},{},{:.0},{:.0},{:.0},{:.0},{:.0},{}",
csv_escape(r.protocol),
csv_escape(&r.label),
r.oracle_len,
r.model_cost,
r.proof_bytes,
r.prove_ms,
r.verify_us,
Expand Down Expand Up @@ -1306,7 +1402,7 @@ fn write_svg(m: usize, log_width: usize, records: &[Record]) {
);
let _ = writeln!(
out,
r##"<text x="{:.1}" y="46" font-size="12" text-anchor="middle" fill="#666">m=2^{m}, 256 polys of 2^{}, {SECURITY_LEVEL}-bit capacity-regime, Poseidon1 — solid lines are per-protocol Pareto frontiers · ◯ = PR default · lower-left is better</text>"##,
r##"<text x="{:.1}" y="46" font-size="12" text-anchor="middle" fill="#666">m=2^{m}, 256 polys of 2^{}, {SECURITY_LEVEL}-bit capacity-regime, Poseidon1 — left x = modelled prover cost (encode+hash+sumcheck+batch), right x = measured wall-clock · solid lines = per-protocol Pareto frontiers · ◯ = PR default · lower-left is better</text>"##,
w / 2.0,
m - log_width,
);
Expand Down Expand Up @@ -1344,9 +1440,9 @@ fn write_svg(m: usize, log_width: usize, records: &[Record]) {
panel_top,
panel_w,
panel_h,
"Theoretical proxy",
"total committed oracle length, elements (log scale)",
&|r: &Record| r.oracle_len,
"Modelled prover cost",
"encode + hash + sumcheck + batch (base-mult-equiv, log scale)",
&|r: &Record| r.model_cost,
&fmt_count,
records,
);
Expand Down