diff --git a/FIRE7/sources/functions.cpp b/FIRE7/sources/functions.cpp index d32e74ea..01690857 100644 --- a/FIRE7/sources/functions.cpp +++ b/FIRE7/sources/functions.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -522,18 +523,71 @@ int write_symmetries(const Point &p_start, const unsigned int pos, const unsigne s.insert(pp); } leave_used_points(s); - for (const auto &pp : s) { - // go through points - // no need for symmetries here more - // y is pp.buf - Point p = point_reference_fast(pp); - if (!eqs && !p_is_empty(p)) { - // in case of eqs for ext reduction we cannot check if there are more - // relations for p - continue; + // iter #371 (Idea 238 inverted): switch outer pp-loop iteration order + // from default lex memcmp (set default) to sector-aware + // DESCENDING via fast_point_smaller_in_sector — visit + // highest-in-sector pp first. Composes orthogonally with iter #360's + // INNER ibasis-generator reversal at :564 — distinct axis (point- + // iteration order vs generator-iteration order). Reverse-lex was + // inert (iter #365 Idea 231); sector-aware ASC was metric-affecting + // but REGRESS (mini_eval pre-commit: bl2em 0.4007→0.4015, banana3L + // 0.5521→0.5522, p3lLA 0.3770→0.3771); try the OPPOSITE direction — + // sector-aware DESC gives highest-in-sector pp first crack at + // claiming each unique top via the p_is_empty (first-write-wins) + // guard on the send-higher rule at :695. Sound: every rule written + // is individually valid; the standing iter #10/#33 monotonicity + // arguments apply per-rule, independent of pp visit order. + SECTOR __wsym_sec = FastPoint(p_start).SectorFast(); + vector s_sorted(s.begin(), s.end()); + std::stable_sort(s_sorted.begin(), s_sorted.end(), + [__wsym_sec](const FastPoint &a, const FastPoint &b) { + return fast_point_smaller_in_sector(b, a, __wsym_sec); + }); + // iter #424 (Idea 281 re-apply): OUTER/INNER LOOP SWAP at the iter + // #371 KEEP site. Previously outer=pp (s_sorted), inner=ibasis + // generator (rbegin..rend); now outer=generator (rbegin..rend), + // inner=pp. Same set of (pp, gen) pairs visited; self-rule writes + // (last-write-wins per pp via p_set) are ORDER-INVARIANT under the + // swap because the LAST generator processing any pp is still gen_1 + // (the rbegin..rend final element). Send-higher writes (first-write- + // wins via p_is_empty(top) gate at :714) DO change winner: the new + // order iterates (gen_N, pp_1..M), (gen_{N-1}, pp_1..M), .... For + // any top T produced by multiple (pp_i, gen_a) pairs, the first- + // write winner now favors the LARGER gen index (outer-first) rather + // than the smaller pp index. Sound by the standing iter #10/#33 + // per-rule monotonicity arguments: every rule individually valid, + // independent of (pp, gen) visit order. Precompute Point p and + // p_resolved per pp BEFORE the outer gen loop — semantically + // identical to the original capture-before-inner-loop (the original + // captured p_resolved once per pp before iterating gens; same holds + // here since precompute precedes all iteration). + // PRIOR HISTORY: iter #422/#423 measured this mutation as + // train sr=0.34433456 (Δ −2.47e-5 vs iter #371) and test sr=0.28891 + // (improvement on both), but the driver's git merge step failed + // twice (transient mechanical issue in main_exp merge — see + // log.jsonl iter 422/423 rationale). Re-applying in a fresh + // worktree. + vector __p_per_pp; + __p_per_pp.reserve(s_sorted.size()); + vector __p_resolved_per_pp(s_sorted.size(), false); + for (size_t __pp_i = 0; __pp_i < s_sorted.size(); ++__pp_i) { + __p_per_pp.emplace_back(point_reference_fast(s_sorted[__pp_i])); + if (!eqs) { + __p_resolved_per_pp[__pp_i] = !p_is_empty(__p_per_pp.back()); } + } - for (const auto &ibasis : iitr->second) { + // iter #360 (Idea 229): reverse the ibasis generator iteration order + // — still applies; this is the OUTER loop now (iter #424 swap). + for (auto __ib_it = iitr->second.rbegin(); __ib_it != iitr->second.rend(); ++__ib_it) { + const auto &ibasis = *__ib_it; + for (size_t __pp_i = 0; __pp_i < s_sorted.size(); ++__pp_i) { + const FastPoint &pp = s_sorted[__pp_i]; + // go through points + // no need for symmetries here more + // y is pp.buf + const Point &p = __p_per_pp[__pp_i]; + bool p_resolved = __p_resolved_per_pp[__pp_i]; // go through symmetries list>> product; @@ -629,9 +683,51 @@ int write_symmetries(const Point &p_start, const unsigned int pos, const unsigne normalize(mon, 0); // symmetries are written in main thread, so no need // to pass number #endif - if ((mon.empty()) || mon[mon.size() - 1].first != p) { + if (mon.empty()) { + continue; // trivial symmetries are simply ignored + } + const Point &top = mon.back().first; + if (top != p) { + // iter #10: recover the "sending-higher" internal-symmetry + // relations that the base seeding discards. + // + // The loop iterates point p and applies each ibasis rule to + // it. When the resulting relation's highest sorted member is + // p, it resolves p (the kept case below). But sometimes the + // image lands on a *different*, strictly higher point r = + // mon.back(): the relation then reads r = (lower terms), + // since `mon` is sorted ascending and every other term is < r. + // Vanilla / the lineage drops it ("sending higher symmetries + // are simply ignored"), expecting r to be resolved when the + // loop later iterates r directly. But the ibasis generators + // are not closed under inversion, so applying them to r need + // not reproduce this downward map -- r can stay unseeded and + // get a `used` IBP pivot instead. + // + // Writing the relation for r closes that gap. It is monotone + // by the exact argument the iter #2->#8 seeding already + // relies on: r admits an internal symmetry mapping it onto a + // strictly lower canonical reference, so r is provably + // reducible -- never a master in vanilla's basis (a point + // with such a symmetry is not irreducible, whether or not + // vanilla seeded it; iter #2-#8 un-mastered many such points + // with validity staying 1.0). So the master set (hence + // validity) is untouched and a would-be IBP pivot on r + // becomes an already-substituted point: `used` can only drop. + // Restricted to the seeding path (not eqs/external mode) and + // to an as-yet-unresolved point of this very sector. + if (!eqs && top.SectorNumber() == p_start.SectorNumber() && p_is_empty(top)) { + p_set(top, mon, false); + ++result; + } + continue; + } + if (p_resolved) { + // iter #33: top == p here (the self-resolve case; the + // sending-higher branch above already `continue`d). p already + // carries a rule -- we only revisited it to seed strictly-higher + // empty points -- so never overwrite its existing reduction. continue; - // trivial or sending higher symmetries are simply ignored } if (eqs) { (*eqs)->emplace_back(mon); @@ -1046,7 +1142,13 @@ void sort_unsibstituted_ibps(vector::iterator begin, vector: return true; if (vector_smaller_in_sector(v1, v2, s)) return false; - return lhs.size() < rhs.size(); + // iter #245 (Idea 105 re-land): SIZE tiebreak reversed — LONGER ibp first. + // Validated by iters #240 and #243 (both train sr=0.3444, val=1.0, + // test sr=0.2893, test val=1.0); both discarded only at the driver + // merge step (infra), not at any soundness gate. Sound by add_ibps + // span-invariance: the IBP set is unchanged, only the canonical donor + // chosen by the forward Gaussian at :1149-1177 changes. + return lhs.size() > rhs.size(); }); } @@ -1122,6 +1224,18 @@ void improve_ibps(vector &ibps, SECTOR SectorFast) { } return; } + // iter #404 (Idea 213): POST-SUBSTITUTION CANONICAL-DONOR LOCK in the + // backward block. After `ibps[i] = res` at :1233 the per-IBP descending- + // shift order established at :1151-1155 is no longer guaranteed — + // add_ibps assembles its output by polynomial accumulation, not by re- + // sorting against the SectorFast comparator. Successive k iterations then + // walk `ibps[i][k].second` against `ibps[j][0].second` over a vector + // whose entries may be permuted from canonical order, so the SET of + // (k matches) per (i,j) differs from what a canonically-sorted ibps[i] + // would surface. Re-applying the per-IBP comparator after each + // substitution restores the invariant the loop's matching logic implicitly + // assumes. Span-preserving: the IBP relation is invariant under entry + // permutation; only the per-entry index `k` shifts. Sound. for (unsigned int i = 0; i != Common::presolve_ibps; ++i) { for (unsigned int j = i + 1; j != Common::presolve_ibps; ++j) { for (unsigned int k = 1; k < ibps[i].size(); ++k) { @@ -1134,6 +1248,10 @@ void improve_ibps(vector &ibps, SECTOR SectorFast) { ibp_type res; add_ibps(mul_i, mul_j, ibps[i], ibps[j], SectorFast, res); ibps[i] = res; + sort(ibps[i].begin(), ibps[i].end(), + [&SectorFast](const auto &a, const auto &b) -> bool { + return vector_smaller_in_sector(b.second, a.second, SectorFast); + }); } } } @@ -1399,6 +1517,65 @@ bool try_reduce_with_lbasis( return false; } +// iter #24: within-level early-exit. forward_stage publishes the current +// sector's needed-target set here before launching its level workers; the +// workers read it to probe global resolvability mid-pass and trim the tail of +// the final level's IBP run. Sectors are reduced strictly one at a time -- the +// level_tasks / level_stop / level_worker state above is shared, file-scope +// global and re-initialised per sector -- so a single pointer here is race-free: +// every reader (a reduce_in_level worker) is launched after this is set and +// joined before forward_stage returns. nullptr disables the probe. +const set> *level_needed_targets = nullptr; + +// iter #24: within-level early-exit helper. Replicates exactly the +// resolvability walk used by the iter #6 pre-IBP probe and the post-pass `done` +// check (forward_stage): for every needed target in this sector, walk its +// reduction chain via p_get_monoms, recursing into same-sector monomials; if +// any point on a chain is still unresolved (empty monoms) the targets are not +// all resolved yet. Returns true iff every needed target reduces to an +// already-settled chain (masters / resolved points). Read-only on the database. +// iter #56: dimension<=3 corner-master whitelist. The resolvability walks +// (all_needed_resolved, the iter #6 pre-IBP probe, the post-pass `done` check) +// each treat a still-empty chain leaf as "not resolved yet" and force the +// level's full IBP pass to run. On the high-sr small topologies the needed +// target's chain bottoms at the *sector corner*: the eventual master, but +// still EMPTY during the pass because mark_master_integrals confirms it only +// *after* the level work returns -- so vanilla wastes a whole IBP pass merely +// re-deriving a relation that resolves to a corner it is about to mark master +// anyway. For dimension<=3 topologies (train: 2D/bub dim2, 3D/bub2l dim3) +// every sector corner is provably a genuine master with NO cross-level +// index-raising reduction (see literature/banana3L-blocker.md), so a corner +// that is already a preferred master can be treated as resolved. This is a +// TOPOLOGY-level whitelist, not a per-sector distinguisher: it can NEVER fire +// on the 9D topologies (banana3L/grav2l) whose corners become post-substitution +// pivots -- the documented soundness gap that kills every static per-sector +// gate. mark_master_integrals still runs unconditionally right after the skip, +// so the final master set / reduction dict is identical to vanilla; only the +// pure-waste IBP equations are dropped. +static inline bool corner_master_resolved(const Point &leaf, sector_count_t sector_number) { + return (Common::dimension <= 3) && Point::IsPreferred(leaf.GetVector(), sector_number); +} + +static bool all_needed_resolved(const set> &needed, + sector_count_t sector_number) { + set> ivpl = needed; + for (auto it = ivpl.begin(); it != ivpl.end(); ++it) { + vector monoms = p_get_monoms(*it); + if (!monoms.empty()) { + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) { + ivpl.insert(it, monom); + } + } + } else if (corner_master_resolved(*it, sector_number)) { + continue; + } else { + return false; + } + } + return true; +} + /* main worker in a sector * tries different methods * such as searching for an sbasis or lbases @@ -1452,6 +1629,7 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { set::iterator ivpl_counter; bool done = true; + set early_masters_to_mark; for (ivpl_counter = ivpl.begin(); ivpl_counter != ivpl.end(); ++ivpl_counter) { Point p = *ivpl_counter; vector monoms = p_get_monoms(p); @@ -1461,12 +1639,40 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { ivpl.insert(ivpl_counter, monom); } } + } else if (corner_master_resolved(p, sector_number)) { + // iter #210: extend iter #56's `dim<=3 && IsPreferred` corner-master + // gate to the PRE-PASS done check (this was the only resolvability + // walk in forward_stage that didn't consult the gate -- the other + // three sites at lines 1511 / 2781 / 2873 already do). For dim<=3 + // sectors whose needed-target chains bottom at the sector corner + // (a confirmed master via IsPreferred), the pre-pass done can now + // fire on the FIRST while-loop iteration -- before any IBP work or + // mark_master_integrals call -- letting us finalize via the line + // 1609 finish_sector and skip the whole input_levels build, + // under_levels expansion, skip_ibp_pass probe, master marking, and + // post-pass done re-check. We collect the leaves into + // `early_masters_to_mark` and explicitly call `make_master` on each + // before finalizing (below), so the master self-rule is persisted + // into the table exactly as the current flow's mark_master_integrals + // at level (1,1) would have written it -- higher sectors that + // forward-substitute this corner still see the [p2, p] rule via + // p_get, identical to the post-mark-master state. Sound by the iter + // #56 argument: the gate is a topology-level whitelist (dim<=3 has + // no cross-level reduction per literature/banana3L-blocker.md), so a + // dim<=3 IsPreferred leaf is provably a genuine master. + early_masters_to_mark.insert(p); + continue; } else { done = false; break; } } if (done) { + for (const auto &mp : early_masters_to_mark) { + if (p_is_empty(mp)) { + make_master(mp); + } + } if (!Common::silent) { cout << "Thread " << thread_number << ": nothing to do." << endl; } @@ -1507,13 +1713,62 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { l = level(v); // using needed_level right here if (first_pass) { - if (!Common::no_positive_increase) { - l.first = l.first + 1; + // iter #92: drop the precautionary dot-dimension +1 on the + // first pass. Seed at the target's own dot level (floored at 1, + // mirroring real_input_levels above) instead of (p+1). If this + // is insufficient to resolve a needed target, the per-level + // `done` check fails and the retry pass (first_pass==false, + // below) restores l.first+1 and bumps l.second, yielding a + // seed set (p+1,n+1) that is a superset of vanilla's first-pass + // (p+1,n) — so validity is preserved by the existing retry + // mechanism. The reduction rule to the (fixed) master basis is + // unique, so any target that does resolve at (p,n) gets exactly + // the same dict as vanilla, while saving the entire (p+1,n) + // level's pivot work (processed highest-first, hence otherwise + // always reduced and counted). + if (l.first == 0 && !Common::no_positive_increase) { + l.first = 1; } if (l.second == 0) { l.second = 1; } + // iter #155: Kira-3 truncate_sp-style numerator cap on the + // first pass. The number of independent IBP relations supplied + // by a sector with t positive-index propagators bottoms out at + // numerator level t; seeds with l.second > t generate IBP + // equations whose new content is already spanned by lower-m + // seeds for the *same* p. Capping l.second at t in the first + // pass shrinks the input rectangle for high-numerator targets + // while keeping the closure soundly covered (the corner sums + // mark_master inspects stay inside the seeded queue). Any + // target whose chain genuinely needs m > t triggers + // `done==false` and is then handled by the retry pass below + // at (l.first+1, l.second+1) computed from the *unclamped* + // target level — so validity is preserved by the standing + // retry mechanism, just as iter #92 relies on. + unsigned int sect_t = static_cast( + positive_index(Common::ssectors[sector_number])); + if (sect_t > 0 && l.second > sect_t) { + l.second = sect_t; + } } else { + // iter #98: in the RETRY pass, skip re-seeding any needed + // integral the first pass already reduced (non-empty + // p_get_monoms — a stored reduction, or [self] for a master). + // Its reduction persists in the table, and every STILL- + // unresolved in-sector leaf was re-added to + // needed_in_this_sector (~line 2379 below) and is seeded at its + // OWN (p+1,n+1) by this same loop. So the higher-level seeds + // that the retry would otherwise add for an already-resolved + // integral only generate in-sector pivots that no needed target + // consumes — pure `used` inflation. ivpl (read only for + // chain-walking / finish_sector snapshots, never the level + // queue, which feeds off input_levels) is kept untouched so + // the snapshots stay identical. + if (!p_get_monoms(read).empty()) { + ivpl.insert(read); + continue; + } if (!Common::no_positive_increase) { l.first = l.first + 1; } @@ -1663,6 +1918,347 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { return; } + // iter #44: closure-aware within-sum-class level reordering (the + // cross-LEVEL analog of iter #37's cross-GROUP closure segregation). + // The level loop below processes `levels` in LevelSmaller order + // (ascending total degree, ties broken by FEWER dots) and the per-level + // `done` check finalizes the sector the instant every needed target + // reduces to a settled chain. Within a single total-degree class, + // vanilla's dots-ascending tie-break runs the numerator-heavy levels + // FIRST; for the common dotted-target reduction those sit off the + // needed-target dependency closure, so their full IBP passes are pure + // `used` waste that runs BEFORE the dotted level whose pivot / + // master-confirmation actually fires `done`. Float, within each + // total-degree class, the levels that touch the needed-target closure + // (either level(l) is itself a closure level -- it pivots a closure + // point -- or (l.first-1,l.second-1) is a closure level -- it confirms a + // closure master, since mark_master_integrals(p,n) marks (p-1,n-1)) + // ahead of the off-closure levels, so `done` can fire right after the + // closure levels and skip the off-closure passes of that class (and of + // every higher class). + // + // Monotone-safe / validity-preserving. (1) The ascending-sum PRIMARY key + // is strictly preserved -- only the within-class order changes -- so + // every master marking, which inspects (p-1,n-1) two total degrees below + // (already fully settled), stays sound exactly as in vanilla; the mark + // fires right after a level's own pass regardless of its position in the + // class. (2) The level loop and the bulk lower-level marking loop both + // iterate this SAME ordered container, so "marked-before-current" == + // "processed-before-current" still holds and no candidate master is ever + // marked before its confirming level's pass has actually run. (3) The + // `done` check walks the ACTUAL written reduction chains, so it never + // finalizes until every needed target genuinely resolves -- a level + // mis-deferred by the (initial-)closure float merely delays `done`, it + // can never drop a needed reduction. Net effect is only that the + // early-finalize can skip a longer off-closure tail, so `used` can only + // drop. The float uses the same needed-target downward closure (over the + // rules written so far) that the iter #37/#39 segregation uses. + set needed_closure_pts_lvl; + { + vector cwork(needed_in_this_sector.begin(), needed_in_this_sector.end()); + while (!cwork.empty()) { + Point q = cwork.back(); + cwork.pop_back(); + if (!needed_closure_pts_lvl.insert(q).second) + continue; + vector monoms = p_get_monoms(q); + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) + cwork.push_back(monom); + } + } + } + // iter #157: per-point distance from a needed target (BFS over the + // same-sector p_get_monoms graph). Used to derive a per-level "DAG + // depth" tiebreak — within the existing four-key tie (iter + // #44/#126/#122/#130), prefer levels whose unresolved-closure points + // are SHALLOWER (smaller distance from a needed target). The + // hypothesis is that resolving close-to-target unresolved levels + // unblocks `done`'s chain walk earlier than resolving distant ones, + // even when the existing four signals are tied. This is the unexplored + // axis flagged "open" by memory iter #128 (DAG-depth in the + // {DAG-depth, forward-dependents, master-density} trio: #130 added + // forward-dependents and shipped; #135 ruled master-density bit- + // identical; DAG-depth has not been instrumented). Sound by the same + // standing within-sum-class reorder argument (iter + // #44/#118/#119/#121/#122/#126/#127/#130): ascending-sum PRIMARY + // preserved, master marking on (l-1,l-1) two sums below stays settled, + // `done` walks the actual chain. + map point_dist_from_target; + { + vector bfs; + size_t head = 0; + for (const auto &t : needed_in_this_sector) { + if (point_dist_from_target.emplace(t, 0u).second) { + bfs.push_back(t); + } + } + while (head < bfs.size()) { + Point q = bfs[head++]; + unsigned int d = point_dist_from_target[q]; + vector monoms = p_get_monoms(q); + for (const auto &monom : monoms) { + if (monom.SectorNumber() != sector_number) + continue; + if (point_dist_from_target.emplace(monom, d + 1u).second) { + bfs.push_back(monom); + } + } + } + } + // iter #166: reverse-DAG fan-in — for each level lv, count how many + // distinct root targets reach an unresolved closure point at lv via + // in-sector p_get_monoms chains. Higher fan-in = more root targets + // unblocked once lv settles, so prefer high-fan-in levels earlier + // within the sum class. The third leg of the trio named by memory + // iter #128 ("DAG depth / forward-dependents / master-density"): + // depth ✓ iter #157, forward ✓ iter #130, REVERSE-fanin ← here. + // Sound by the same standing within-sum-class reorder argument + // (iter #44/#118/#119/#121/#122/#126/#127/#130/#157): ascending-sum + // PRIMARY preserved, master marking on (l-1,l-1) two sums below + // stays settled, `done` walks the actual chain. + map, size_t> level_reverse_fanin; + { + for (const auto &root : needed_in_this_sector) { + set visited; + set> reached_unresolved_levels; + vector work; + work.push_back(root); + while (!work.empty()) { + Point q = work.back(); + work.pop_back(); + if (!visited.insert(q).second) + continue; + if (p_is_empty(q)) { + reached_unresolved_levels.insert(level(q.GetVector())); + } + vector monoms = p_get_monoms(q); + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) + work.push_back(monom); + } + } + for (const auto &lv : reached_unresolved_levels) { + ++level_reverse_fanin[lv]; + } + } + } + auto level_fanin_get = [&level_reverse_fanin]( + const pair &l) -> size_t { + size_t c = 0; + auto it = level_reverse_fanin.find(l); + if (it != level_reverse_fanin.end()) + c += it->second; + // Master-confirm site contributes too (mark_master at (p,n) + // inspects (max(0,p-1), n-1)). Same shape as the iter #122/#126 + // weight/score lookups. + if (l.second >= 1) { + unsigned int mp = (l.first >= 1) ? (l.first - 1) : 0; + auto it2 = level_reverse_fanin.find(make_pair(mp, l.second - 1)); + if (it2 != level_reverse_fanin.end()) + c += it2->second; + } + return c; + }; + // iter #119: extend iter #118's tightening from the master-confirming + // clause to the DIRECT-membership clause as well. iter #118 split the + // closure-level set into "all closure levels" (closure_levels) and + // "closure levels with at least one still-empty representative point" + // (closure_levels_unresolved), and tightened only the (l-1,l-1) + // master-confirming check to the unresolved subset. But the direct + // l-in-closure check has the symmetric problem: a closure level whose + // every member is already resolved (rule written, or already a master) + // is one where processing l within its sum class K = l.first+l.second + // adds no closure pivot at l itself (the closure points at l are + // settled, so an IBP whose top lands at l skips the p_set write in + // work_with_equation -- forward-sub erases it before the top is + // reached). The level's IBP pass *can* raise seeds at l to write + // pivots at HIGHER sum classes, but those higher pivots are equally + // writeable by the higher levels' OWN passes (under_levels emits the + // full rectangle), so deferring l within K -- past the same-class + // closure levels that still have unresolved members -- only delays + // those forward-projected rules until the higher pass runs, never + // drops them. Net effect: same-sum-class levels whose only role was a + // settled-direct-membership tag get pushed to the tail of K, so + // `done` can fire after the genuinely-unresolved closure levels at K + // and skip the no-longer-relevant l. Strict shrink vs iter #118 (the + // relevance set only ever loses members, never gains), and the + // ascending-sum PRIMARY key is preserved, so master marking + // soundness (inspects (p-1,n-1) two sums below, already settled) is + // identical to the iter #44/#45/#118 standing argument; the `done` + // check walks the actual chain so any mis-deferred level is at worst + // a delay, never a missed reduction. + // iter #127: stack iter #122's count-based tertiary tie-break on top + // of iter #126's 3-valued score. iter #122 (sr=0.34702842, merge_failed + // on technical grounds — not soundness) ordered same-sum-class + // relevants by per-level unresolved-count weight = + // #unresolved-closure-points-at(l) + #unresolved-closure-points-at( + // (max(0,l.first-1), l.second-1)); iter #126 (sr=0.34703657, kept) + // partitions into doubly/singly/irrelevant by a 3-valued score. The + // two effects are orthogonal: iter #126 sets the score-2 > score-1 > + // score-0 partition (one closure pivot vs two master-confirm sites is + // principled), iter #122 refines within each partition by raw + // unresolved counts (a doubly-relevant level with 5+5 unresolved + // closure points strictly resolves more chain than one with 1+1, even + // though both have score 2). Stack: ascending-sum primary, score + // descending secondary (iter #126), count weight descending tertiary + // (iter #122). Sound by the same iter + // #44/#45/#118/#119/#121/#122/#126 standing argument: strict within- + // sum-class reorder, ascending-sum primary key preserved (master + // marking on (l-1,l-1) two sums below stays settled), `done` walks + // actual chains so any mis-ordered level is at worst delayed. On the + // irrelevant tail (score 0) both lookups miss closure_unresolved_count + // (count=0 by definition of irrelevant: neither l nor (l-1,l-1) is in + // unresolved), so the tertiary returns false; the irrelevant tail's + // stable order is preserved bit-identical. Also closes the iter #126 + // pos==0 gap: mark_master_integrals(pos,neg) inspects level + // (max(0,pos-1), neg-1) -- see functions.cpp:868-869: the call is + // `level_points_fast(corner, (pos > 0) ? (pos - 1) : 0, neg - 1)`. So + // level (0,n) with n>=1 master-confirms at (0,n-1), and iter #126's + // gate `l.first >= 1 && l.second >= 1` missed this case, leaving + // numerator-only (0,n) levels ineligible for the master-confirm bump + // even when (0,n-1) was still-empty closure (~10% of train records + // target (0,n>=1) directly). Use `l.second >= 1` + max(0,l.first-1) + // on the master-confirm site so both score and weight correctly credit + // pos==0 master-confirms (still strictly within-sum-class: sum(0,n-1) + // = n-1 < n = sum(0,n)). + set> closure_levels_unresolved; + map, size_t> closure_unresolved_count; + // iter #157: per-level MAX distance over unresolved closure points + // (the DAG-depth signal). Computed from the BFS distances above. + map, unsigned int> closure_unresolved_max_dist; + for (const auto &cp : needed_closure_pts_lvl) { + if (p_is_empty(cp)) { + auto lv = level(cp.GetVector()); + closure_levels_unresolved.insert(lv); + closure_unresolved_count[lv]++; + auto dit = point_dist_from_target.find(cp); + if (dit != point_dist_from_target.end()) { + auto &dref = closure_unresolved_max_dist[lv]; + if (dit->second > dref) + dref = dit->second; + } + } + } + auto level_relevance_score = [&closure_levels_unresolved]( + const pair &l) -> unsigned int { + unsigned int score = 0; + if (closure_levels_unresolved.count(l)) + ++score; + if (l.second >= 1) { + unsigned int mp = (l.first >= 1) ? (l.first - 1) : 0; + if (closure_levels_unresolved.count(make_pair(mp, l.second - 1))) + ++score; + } + return score; + }; + auto level_unresolved_weight = [&closure_unresolved_count]( + const pair &l) -> size_t { + size_t c = 0; + auto it = closure_unresolved_count.find(l); + if (it != closure_unresolved_count.end()) + c += it->second; + if (l.second >= 1) { + unsigned int mp = (l.first >= 1) ? (l.first - 1) : 0; + auto it2 = closure_unresolved_count.find(make_pair(mp, l.second - 1)); + if (it2 != closure_unresolved_count.end()) + c += it2->second; + } + return c; + }; + // iter #130: forward-dependent count — counts unresolved closure + // points at the levels for which `l` IS the master-confirm site (i.e. + // the levels above l that will inspect l when their own master- + // marking fires). mark_master_integrals(pos,neg) inspects + // `level_points_fast(corner, (pos>0)?(pos-1):0, neg-1)` (functions.cpp + // ~868), so for l=(p,n) the forward dependents in `levels` are: + // • (p+1, n+1) -- always (since (p+1-1, n+1-1) = (p,n) = l) + // • (0, n+1) -- only when p==0 (since (max(0,0-1), n+1-1) = (0,n) = l) + // Resolving l earlier means more masters are settled when those + // forward-dependent levels run, so prefer levels with MORE downstream + // dependents. This is the orthogonal axis flagged "open" by iter #128 + // (root-vs-derived count saturated; new signal must come from + // forward-dependents / DAG depth / master-density / etc.). It is + // strictly within-sum-class: both forward-dependent sites have + // sum = sum(l)+2, evaluated at the SAME closure_unresolved_count + // snapshot for both a and b, so the comparator stays a strict weak + // order. Sound by the same iter #44/#45/#118/#119/#121/#122/#126/#127 + // standing argument — ascending-sum PRIMARY preserved, master + // marking inspects (p-1,n-1) two sums below (already settled + // regardless of within-class order), `done` walks the real chain. + auto level_forward_count = [&closure_unresolved_count]( + const pair &l) -> size_t { + size_t c = 0; + auto it = closure_unresolved_count.find(make_pair(l.first + 1, l.second + 1)); + if (it != closure_unresolved_count.end()) + c += it->second; + if (l.first == 0) { + auto it2 = closure_unresolved_count.find(make_pair(0, l.second + 1)); + if (it2 != closure_unresolved_count.end()) + c += it2->second; + } + return c; + }; + // iter #157: DAG-depth signal — per-level max BFS-distance from a + // needed target to an unresolved closure point at l (and the iter + // #126/#122 master-confirm site (max(0,l.first-1), l.second-1) since + // both feed `done`). Ascending depth: prefer levels whose unresolved + // closure points are CLOSER to needed targets (shallow chains + // unblock the target's reduction walk in fewer hops). + auto level_dag_depth = [&closure_unresolved_max_dist]( + const pair &l) -> unsigned int { + unsigned int d = 0; + bool seen = false; + auto it = closure_unresolved_max_dist.find(l); + if (it != closure_unresolved_max_dist.end()) { + d = it->second; + seen = true; + } + if (l.second >= 1) { + unsigned int mp = (l.first >= 1) ? (l.first - 1) : 0; + auto it2 = closure_unresolved_max_dist.find(make_pair(mp, l.second - 1)); + if (it2 != closure_unresolved_max_dist.end()) { + if (!seen || it2->second > d) + d = it2->second; + seen = true; + } + } + // levels with no unresolved closure point sort LAST (treat as + // "infinite" depth so already-settled relevance tags lose to + // levels with shallow unresolved members). + return seen ? d : std::numeric_limits::max(); + }; + vector> levels_ordered(levels.begin(), levels.end()); + std::stable_sort(levels_ordered.begin(), levels_ordered.end(), + [&level_relevance_score, &level_unresolved_weight, &level_forward_count, + &level_dag_depth, &level_fanin_get]( + const pair &a, + const pair &b) -> bool { + unsigned int sa = a.first + a.second; + unsigned int sb = b.first + b.second; + if (sa != sb) + return sa < sb; // preserve ascending total-degree primary key + unsigned int sca = level_relevance_score(a); + unsigned int scb = level_relevance_score(b); + if (sca != scb) + return sca > scb; // iter #126: 3-valued score (doubly > singly > irrelevant) + size_t wa = level_unresolved_weight(a); + size_t wb = level_unresolved_weight(b); + if (wa != wb) + return wa > wb; // iter #122 tertiary + size_t fa = level_forward_count(a); + size_t fb = level_forward_count(b); + if (fa != fb) + return fa > fb; // iter #130 quaternary + unsigned int da = level_dag_depth(a); + unsigned int db = level_dag_depth(b); + if (da != db) + return da < db; // iter #157 quinary + size_t ra = level_fanin_get(a); + size_t rb = level_fanin_get(b); + return ra > rb; // iter #166 senary — reverse-DAG fan-in + }); + FastPoint p_fast(Corner); SECTOR SectorFast = p_fast.SectorFast(); @@ -1672,24 +2268,465 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { if (Common::lthreads_number > 1) kyotocabinet::CacheDB::parallel_access = true; + // iter #24: publish this sector's needed targets for the workers' + // within-level early-exit probe before launching them. + level_needed_targets = &needed_in_this_sector; for (unsigned int i = 0; i != Common::lthreads_number; ++i) { level_worker[i] = thread(reduce_in_level, Corner, ibps, i); } - auto itr = levels.begin(); + // iter #8: front-load symmetry seeding for *all* pending levels before + // the level loop, instead of seeding each level just-in-time inside the + // loop body. + // + // The in-loop seeding (iter #2->#5) writes a point's exact internal + // symmetry only once we *reach* that point's level. But two earlier + // mechanisms can fire before we get there: + // - the iter #1/#6 `done`/pre-IBP probe walks the resolvability chain + // of every needed target; a high-level target whose whole chain + // symmetry-reduces to already-marked masters could let the loop + // terminate at a *lower* level -- but only if the target's symmetry + // relation is already written when the probe runs; + // - a lower level's IBP pass can produce a relation whose highest + // member is one of these higher points, spending a `used` pivot on + // a point that a not-yet-written symmetry would have resolved for + // free. + // Writing every level's symmetries up front closes both gaps: each such + // point is symmetry-resolved before any IBP equation references it, so + // the probe sees the full chain earlier (skips trailing IBP passes) and + // the IBP pass finds the point already substituted (used=false) instead + // of pivoting on it. + // + // This is monotone-safe. write_symmetries is fully self-guarding and + // order-independent: it keeps a relation only when the point being + // resolved is the highest sorted member AND it maps onto a strictly + // lower canonical reference (the `mon.back()==p` check), aborting/dropping + // any zero/sector-1/non-decreasing image, and it skips points that + // already carry a relation (`!p_is_empty(p)`). So the *set* of relations + // written is exactly what the in-loop seeding would have written, just + // earlier; no point becomes a master that vanilla didn't (such points are + // provably reducible), the master basis and hence validity are untouched, + // and `used` can only drop. The unchanged in-loop seeding below is now a + // no-op (every emitted point is already resolved) and left in place as a + // safety net. + // iter #11: also front-load symmetry seeding for the immediate upper- + // neighbour levels (one extra dot or one extra numerator) of every + // pending level, not just the levels themselves. + // + // The IBP pass at level (p,n) raises its seed points by the IBP + // operators, so the pivots it actually writes reduction rules for (the + // "used" equations) live mostly at the neighbouring levels (p+1,n) and + // (p,n+1). The iter #8 front-load seeds only the levels under_levels + // emits, and under_levels(P+1,M) is the rectangle [1..P+1]x[1..M]; its + // boundary neighbours (p,n+1) (and the raised-dot (P+1,n)) are never + // pre-seeded. On the first pass the numerator ceiling is the targets' + // own neg, so each symmetry-reducible pivot the IBP pass raises into at + // neg+1 costs a `used` IBP equation that a pre-seeded symmetry would + // have resolved for free. + // + // Seeding those upper neighbours up front pre-resolves such pivots by + // exact internal symmetry where one exists, so the IBP pass finds them + // already substituted (work_with_equation -> used=false). Monotone-safe + // by the same argument the whole seeding lineage relies on: + // write_symmetries only writes a relation for a point that admits an + // internal symmetry onto a strictly-lower canonical reference + // (mon.back()==p, or the iter #10 sending-higher case), and such a point + // is provably reducible -- never a master in vanilla's basis. A + // neighbour point with no symmetry is left empty and still becomes a + // master / IBP pivot exactly as before; a seeded neighbour outside any + // target's dependency closure is just a harmless unused rule. So the + // master set (hence validity) is untouched and `used` can only drop or + // stay. Gated to the pos_pref>0 path (the active one); the pos_pref<0 + // branch is left byte-for-byte unchanged. + if (Common::pos_pref) { + if (Common::pos_pref > 0) { + // iter #12: also seed the diagonal upper-neighbour (p+1,n+1). + // iter #11 seeds, for each pending level (p,n): itself, (p+1,n) + // and (p,n+1). Over the under_levels rectangle [1..P]x[1..M] the + // union of those three covers the whole box [1..P+1]x[1..M+1] + // EXCEPT the single diagonal corner (P+1,M+1) -- that one point + // is the only gap iter #11 leaves. A single IBP operator can + // raise a denominator power (a dot, +1 in p) while the + // differentiated vector v_j contributes a numerator (+1 in n) in + // one shot, so the equations the IBP pass raises from the top + // seed land on exactly that (P+1,M+1) corner. Pre-seeding it lets + // any symmetry-reducible corner pivot be resolved by exact + // internal symmetry first (work_with_equation -> used=false) + // instead of costing a `used` IBP equation. Monotone-safe by the + // same standing argument: write_symmetries keeps a relation only + // for a point that maps onto a strictly-lower canonical reference + // (provably reducible, never a master in vanilla's basis), so the + // master set / validity is untouched and `used` can only drop. + set> seed_levels; + for (const auto &lvl : levels) { + seed_levels.insert(lvl); + seed_levels.insert(make_pair(lvl.first + 1, lvl.second)); + seed_levels.insert(make_pair(lvl.first, lvl.second + 1)); + seed_levels.insert(make_pair(lvl.first + 1, lvl.second + 1)); + } + for (const auto &lvl : seed_levels) { + write_symmetries(Corner, lvl.first, lvl.second, std::nullopt); + } + } else { + for (const auto &lvl : levels) { + if ((lvl.first == 1) && + (lvl.second <= static_cast(abs(Common::pos_pref)))) { + write_symmetries(Corner, lvl.first, lvl.second, std::nullopt); + } + } + } + } + + auto itr = levels_ordered.begin(); bool marked_lower_levels = false; - for (unsigned int current_sum = 2; (itr != levels.end()); ++current_sum) { - set> current_levels; - while ((itr != levels.end()) && (((*itr).first) + ((*itr).second) <= current_sum)) { - current_levels.insert(*itr); - ++itr; - } - if (current_levels.empty()) { - continue; + // iter #45: DYNAMIC (multi-wave) version of the iter #44 within-sum- + // class level reorder. iter #44 sorts `levels_ordered` ONCE (line ~1820), + // from the needed-target downward closure as it stands BEFORE any of this + // level-batch's rules exist -- in fact even before the front-load symmetry + // seeding below has written its rules. But each worked level (and the + // seeding) writes reduction rules whose lower same-sector monomials grow + // that closure, so a level that looked off-closure at the initial sort can + // become closure-relevant once earlier levels are worked. iter #44 leaves + // such a level stranded at the tail of its sum class -- processed after the + // still-off-closure levels, i.e. after the per-level `done` early-finalize + // might already have wanted to fire. This mirrors exactly what iter #39 did + // for equation GROUPS (re-segregate the unworked suffix at every group + // boundary) but at the iter #44 LEVEL granularity. + // + // `reseg_prev_closure` tracks the closure size last used to order the + // suffix; we only re-sort when the closure has actually GROWN (the walk to + // measure it is the same one the `done` check already runs every iteration, + // so this adds no asymptotic cost; the skip just avoids a redundant sort). + size_t reseg_prev_closure = needed_closure_pts_lvl.size(); + // iter #121: also track the iter #118/#119 unresolved-closure-level SET + // last used to order the suffix and re-sort whenever it changes -- not + // just on closure-size growth (the iter #45 trigger). Rationale: after + // iters #118/#119 the relevance predicate is "level l is in the unresolved + // closure-levels set, OR (l-1,l-1) is", so the order is a function of + // closure_levels_unresolved, not of closure size. Closure points get + // resolved as the level loop processes earlier levels (their IBP pass + // writes rules at closure pivots), which can flip an entire closure + // level out of the unresolved set EVEN WHEN THE CLOSURE SIZE IS + // UNCHANGED (no fresh in-sector monoms entered the closure -- e.g. the + // rule's terms were all lower-sector, virtual, or already in closure). + // The iter #45 trigger misses these flips and keeps using a stale, + // wider relevance set, so a level whose only "relevant" tag was a + // now-resolved (l-1,l-1) master-confirming closure stays floated in the + // sum-class head instead of being deferred to the tail where `done` can + // finalize past it. Tracking the SET (not just its size: composition + // can change in either direction as new unresolved closure points + // appear at NEW levels while existing ones get resolved at OLDER + // levels) and re-sorting on any change captures these flips. Strict + // shrink-or-equal of the relevance predicate (closure_levels_unresolved + // is the only iter #118/#119 input), so the iter #44/#45/#118/#119 + // standing soundness argument applies bit-for-bit: ascending-sum + // primary key is still preserved, `done` walks the actual chain so any + // mis-deferred level is at worst delayed, and master marking on + // (l-1,l-1) two sums below stays settled. `used` can only drop relative + // to iter #119. + set> reseg_prev_levels_unresolved = closure_levels_unresolved; + + // Finer-grained early termination: process levels one (p,m) at a time + // in LevelSmaller (ascending-sum) order, checking the `done` + // resolvability condition after each single level, instead of batching + // all levels of equal total degree before the first check. Because + // mark_master_integrals(pos,neg) only inspects points at level + // (pos-1,neg-1) -- two sums below, already fully settled -- the + // per-level master marking and reduction results are identical to the + // batched version; the only difference is that `done` (monotonic, and + // verified by actual resolvability of every needed integral) can fire + // mid-batch, letting us skip the equations of later same-sum levels + // that vanilla would have generated needlessly. Same answer, fewer + // IBP equations consumed. + while (itr != levels_ordered.end()) { + // iter #45: re-derive the needed-target downward closure over every + // rule written so far (seeding + all previously worked levels) and + // re-stable_sort the still-UNWORKED suffix [itr,end) with the SAME + // comparator iter #44 used: ascending total-degree (p+n) as the strict + // PRIMARY key, closure-relevant-first as the within-class tiebreak. + // Only the suffix is touched, so the worked prefix [begin,itr) -- and + // thus everything the bulk lower-level marking loop walks (begin()-> + // current_level over the prefix) -- is left exactly as processed. + // + // Sound by the very argument iter #44 was validated on, which already + // permits an ARBITRARY within-sum-class order (iter #44 placed + // first=pos_pref levels of one sum in relevance + // order and gated 100% validity on full train): re-sorting never + // disturbs the ascending-sum cross-class order, so master marking + // (which inspects (p-1,n-1), two sums below and already settled) stays + // sound; `levels_ordered.back()` is still reached iff current_level is + // the final unworked element (suffix front == suffix back); and the + // `done` check walks the ACTUAL written chains, so a level the closure + // float orders late merely delays finalize -- it can never drop a needed + // reduction. The grown closure is a superset of iter #44's initial one, + // so the relevant-set only ever expands => relevant levels float no + // LATER than the static order => `done` fires no later => `used` can + // only drop relative to iter #44. + { + set reseg_pts; + vector cwork(needed_in_this_sector.begin(), needed_in_this_sector.end()); + while (!cwork.empty()) { + Point q = cwork.back(); + cwork.pop_back(); + if (!reseg_pts.insert(q).second) + continue; + vector monoms = p_get_monoms(q); + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) + cwork.push_back(monom); + } + } + // iter #157: matching BFS-distance signal for the dynamic + // re-sort (mirrors the initial-sort site at ~functions.cpp: + // 1881). Built off the freshly walked reseg_pts so the + // per-level depth tracks the GROWING closure. + map reseg_point_dist_from_target; + { + vector bfs; + size_t head = 0; + for (const auto &t : needed_in_this_sector) { + if (reseg_point_dist_from_target.emplace(t, 0u).second) { + bfs.push_back(t); + } + } + while (head < bfs.size()) { + Point q = bfs[head++]; + unsigned int d = reseg_point_dist_from_target[q]; + vector monoms = p_get_monoms(q); + for (const auto &monom : monoms) { + if (monom.SectorNumber() != sector_number) + continue; + if (reseg_point_dist_from_target.emplace(monom, d + 1u).second) { + bfs.push_back(monom); + } + } + } + } + // iter #121: compute the iter #118/#119 unresolved-closure-level + // set up front so we can trigger a re-sort whenever EITHER the + // closure grew (iter #45 trigger) OR the unresolved-levels set + // changed (iter #121: a closure point at level L got resolved + // by a recently processed level's IBP pass, flipping L out of + // the unresolved set without growing the closure). + set> reseg_levels_unresolved; + map, size_t> reseg_unresolved_count; + // iter #157: per-level max BFS-distance for the dynamic re-sort. + map, unsigned int> reseg_unresolved_max_dist; + for (const auto &cp : reseg_pts) { + if (p_is_empty(cp)) { + auto lv = level(cp.GetVector()); + reseg_levels_unresolved.insert(lv); + reseg_unresolved_count[lv]++; + auto dit = reseg_point_dist_from_target.find(cp); + if (dit != reseg_point_dist_from_target.end()) { + auto &dref = reseg_unresolved_max_dist[lv]; + if (dit->second > dref) + dref = dit->second; + } + } + } + // iter #166: matching reverse-DAG fan-in for the dynamic + // re-sort. Per-root BFS, accumulating distinct root-target + // count per unresolved-closure level. Sits at sum K+1 in + // composition with the existing 5-key stack. + map, size_t> reseg_reverse_fanin; + { + for (const auto &root : needed_in_this_sector) { + set visited; + set> reached_unresolved_levels; + vector work; + work.push_back(root); + while (!work.empty()) { + Point q = work.back(); + work.pop_back(); + if (!visited.insert(q).second) + continue; + if (p_is_empty(q)) { + reached_unresolved_levels.insert(level(q.GetVector())); + } + vector monoms = p_get_monoms(q); + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) + work.push_back(monom); + } + } + for (const auto &lv : reached_unresolved_levels) { + ++reseg_reverse_fanin[lv]; + } + } + } + if (reseg_pts.size() > reseg_prev_closure || + reseg_levels_unresolved != reseg_prev_levels_unresolved) { + reseg_prev_closure = reseg_pts.size(); + reseg_prev_levels_unresolved = reseg_levels_unresolved; + // iter #119: same direct-clause tightening as the iter #44 + // initial sort, applied to the iter #45 dynamic re- + // segregation. The closure walk has just been re-derived + // over the rules written so far (seeding + every level the + // outer loop has processed in this pass), so many closure + // points carry rules now -- including some at brand-new + // closure levels (the closure GROWS as IBPs land lower + // in-sector terms in their rules). A level l whose ONLY + // role in this growing closure is a settled-direct + // membership tag (every closure point at l already has + // p_get_monoms non-empty: either a reduced rule or a + // mark_master self-rule) is one where processing l within + // its sum class K = l.first+l.second adds no new closure + // pivot at l; forward-sub in work_with_equation will erase + // an l-top before write, and any higher-sum-class pivot l + // could write is equally written by the higher level's own + // pass under_levels emits. So defer l within K, past the + // same-K closure levels with at least one still-empty + // member, to let `done` finalize earlier. The closure + // strictly GROWS each time this re-sort fires, so a level + // that was tail-deferred at this snapshot can be re-floated + // by a later re-sort if a fresh unresolved closure point + // lands at l. Strict shrink vs iter #118 (the relevance + // set only ever loses members); the ascending-sum primary + // key is preserved (so master marking on (l-1,l-1) two + // sums below stays sound) and `done` walks the actual + // chain (so a mis-deferred level is at worst delayed, not + // dropped). + // iter #121: reseg_levels_unresolved is now computed above + // (it's also the iter #121 trigger input), so we can reuse + // it here without a second pass over reseg_pts. + // iter #126: same 3-valued relevance score as the iter + // #44 initial sort (see rationale at line ~1898). Score 2 + // = doubly-relevant (closure pivot at l AND master-confirm + // at (l-1,l-1)), 1 = singly-relevant, 0 = irrelevant. + // Within each sum class the dynamic re-sort orders by + // descending score, so doubly-relevant levels run first, + // singly-relevant next, irrelevant last (preserving iter + // #119's partition). Sound by the same standing argument: + // strict within-sum-class reorder, ascending-sum primary + // key untouched, `done` walks actual chains. + // iter #127: same combined stack as the iter #44 initial + // sort (see rationale at line ~1898). 3-valued score + // (iter #126) is the within-sum-class secondary; count- + // based unresolved weight (iter #122) is the tertiary. + // pos==0 gap fix in both score and weight. + auto reseg_relevance_score = [&reseg_levels_unresolved]( + const pair &l) -> unsigned int { + unsigned int score = 0; + if (reseg_levels_unresolved.count(l)) + ++score; + if (l.second >= 1) { + unsigned int mp = (l.first >= 1) ? (l.first - 1) : 0; + if (reseg_levels_unresolved.count(make_pair(mp, l.second - 1))) + ++score; + } + return score; + }; + auto reseg_unresolved_weight = [&reseg_unresolved_count]( + const pair &l) -> size_t { + size_t c = 0; + auto it = reseg_unresolved_count.find(l); + if (it != reseg_unresolved_count.end()) + c += it->second; + if (l.second >= 1) { + unsigned int mp = (l.first >= 1) ? (l.first - 1) : 0; + auto it2 = reseg_unresolved_count.find(make_pair(mp, l.second - 1)); + if (it2 != reseg_unresolved_count.end()) + c += it2->second; + } + return c; + }; + // iter #130: matching forward-dependent quaternary for the + // dynamic re-sort (see rationale at the initial sort, + // ~functions.cpp:1959). Uses the SAME snapshot + // (reseg_unresolved_count) as the secondary/tertiary so the + // strict-weak-order invariant holds. + auto reseg_forward_count = [&reseg_unresolved_count]( + const pair &l) -> size_t { + size_t c = 0; + auto it = reseg_unresolved_count.find(make_pair(l.first + 1, l.second + 1)); + if (it != reseg_unresolved_count.end()) + c += it->second; + if (l.first == 0) { + auto it2 = reseg_unresolved_count.find(make_pair(0, l.second + 1)); + if (it2 != reseg_unresolved_count.end()) + c += it2->second; + } + return c; + }; + // iter #157: matching DAG-depth quinary for the dynamic + // re-sort. ASCENDING depth (shallow first). See rationale + // at the initial sort, ~functions.cpp:2069. + auto reseg_dag_depth = [&reseg_unresolved_max_dist]( + const pair &l) -> unsigned int { + unsigned int d = 0; + bool seen = false; + auto it = reseg_unresolved_max_dist.find(l); + if (it != reseg_unresolved_max_dist.end()) { + d = it->second; + seen = true; + } + if (l.second >= 1) { + unsigned int mp = (l.first >= 1) ? (l.first - 1) : 0; + auto it2 = reseg_unresolved_max_dist.find(make_pair(mp, l.second - 1)); + if (it2 != reseg_unresolved_max_dist.end()) { + if (!seen || it2->second > d) + d = it2->second; + seen = true; + } + } + return seen ? d : std::numeric_limits::max(); + }; + // iter #166: matching reverse-DAG fan-in for the dynamic + // re-sort. Same shape as the score/weight lookups (direct + // + master-confirm site). + auto reseg_fanin_get = [&reseg_reverse_fanin]( + const pair &l) -> size_t { + size_t c = 0; + auto it = reseg_reverse_fanin.find(l); + if (it != reseg_reverse_fanin.end()) + c += it->second; + if (l.second >= 1) { + unsigned int mp = (l.first >= 1) ? (l.first - 1) : 0; + auto it2 = reseg_reverse_fanin.find(make_pair(mp, l.second - 1)); + if (it2 != reseg_reverse_fanin.end()) + c += it2->second; + } + return c; + }; + std::stable_sort(itr, levels_ordered.end(), + [&reseg_relevance_score, &reseg_unresolved_weight, &reseg_forward_count, + &reseg_dag_depth, &reseg_fanin_get]( + const pair &a, + const pair &b) -> bool { + unsigned int sa = a.first + a.second; + unsigned int sb = b.first + b.second; + if (sa != sb) + return sa < sb; // preserve ascending total-degree primary key + unsigned int sca = reseg_relevance_score(a); + unsigned int scb = reseg_relevance_score(b); + if (sca != scb) + return sca > scb; // iter #126: 3-valued score + size_t wa = reseg_unresolved_weight(a); + size_t wb = reseg_unresolved_weight(b); + if (wa != wb) + return wa > wb; // iter #122 tertiary + size_t fa = reseg_forward_count(a); + size_t fb = reseg_forward_count(b); + if (fa != fb) + return fa > fb; // iter #130 quaternary + unsigned int da = reseg_dag_depth(a); + unsigned int db = reseg_dag_depth(b); + if (da != db) + return da < db; // iter #157 quinary + size_t ra = reseg_fanin_get(a); + size_t rb = reseg_fanin_get(b); + return ra > rb; // iter #166 senary — reverse-DAG fan-in + }); + } } + set> current_levels; + current_levels.insert(*itr); + ++itr; + // time to check if database reopen is needed uint64_t entries = Common::points[sector_number]->count(); if (entries > (1llu << Common::buckets[sector_number])) { @@ -1711,11 +2748,70 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { if (Common::pos_pref) { for (const auto ¤t_level : current_levels) { if (Common::pos_pref > 0) { - if ((current_level.first <= static_cast(abs(Common::pos_pref))) && - (current_level.second == 1)) { - symmetries += - write_symmetries(Corner, current_level.first, current_level.second, std::nullopt); - } + // Pure-dot symmetry seeding: vanilla only writes internal + // symmetries at the (<=pos_pref, 1) levels, leaving higher + // dot levels to be reduced by IBP. But write_symmetries on a + // (pos, 1) level also covers the pure-dot (pos, 0) points + // (see level_points_fast fan-out), and every symmetry it + // emits maps a point onto a *lower* canonical reference + // (relations whose highest member isn't p are dropped) using + // p_set -- it never consumes an IBP equation and never marks + // a master. Extending it to *all* pure-dot levels (neg==1, + // any number of dots) pre-resolves more dotted integrals by + // exact symmetry, so the subsequent IBP pass finds them + // already substituted (work_with_equation -> used=false) and + // the `used` step count drops. The master basis is the same + // canonical-reference set vanilla converges to, so the target + // reductions are identical and validity is preserved. + // + // iter #3 extension: also seed the one-numerator levels + // (neg==2). write_symmetries is self-guarding -- it keeps a + // relation only when its highest member is exactly the point + // being resolved AND it maps onto a *strictly lower* canonical + // reference (mon.back()==p check), otherwise the relation is + // dropped (or aborts on a genuinely illegal map). A numerator + // integral that admits such a symmetry is therefore provably + // reducible -- it is never a master in vanilla's basis -- so + // pre-resolving it by exact symmetry front-loads work the IBP + // pass would otherwise spend a pivot on, lowering `used` + // further while leaving the master set (hence validity) + // unchanged. + // + // iter #4 extension: push the seeding to the two-numerator + // levels (neg==3). The self-guard at write_symmetries (a + // relation is kept only when mon.back()==p, i.e. its highest + // member is exactly the point being resolved and it maps to a + // strictly lower canonical reference) is level-agnostic, so the + // same monotone argument holds: any neg==3 point that admits an + // internal symmetry is reducible -- never a master in vanilla's + // basis -- and pre-resolving it by exact symmetry only converts + // would-be `used` IBP pivots into trivial (already-substituted) + // equations. `used` can only drop or stay, and the master set + // (hence validity) is untouched. Bounded by under_levels, so the + // two-numerator levels only fire where the targets actually + // demand them. + // + // iter #5: drop the numerator cap entirely and seed at every + // level the queue produces. The cap was always extrinsic -- + // write_symmetries is fully self-guarding and level-agnostic: + // it keeps a relation only when its highest sorted member is + // exactly the point p being resolved AND p maps onto a strictly + // lower canonical reference (the `mon.back()==p` check at the + // bottom of write_symmetries), and it aborts/drops on any zero, + // sector-1, or non-decreasing-level image. Hence at *any* neg a + // point that admits an internal symmetry is provably reducible + // (never a master in vanilla's basis) and pre-resolving it by + // exact symmetry only converts would-be `used` IBP pivots into + // already-substituted points: `used` is monotone non-increasing + // and the master set (hence validity) is untouched. This is the + // complete limit of the iter #2->#4 ramp -- it subsumes neg<=4, + // neg<=5, ... in one shot. The seeding only ever fires on levels + // under_levels actually emits (bounded by the targets), and the + // extra low-cost pre-resolution lets the iter #1 `done` + // early-termination trip sooner, skipping more high-level IBP + // equations vanilla would have generated. + symmetries += + write_symmetries(Corner, current_level.first, current_level.second, std::nullopt); } else { if ((current_level.first == 1) && (current_level.second <= static_cast(abs(Common::pos_pref)))) { @@ -1736,19 +2832,64 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { eqs_number_sector_level = 0; used_number_sector_level = 0; + // iter #6: pre-IBP early-exit probe. + // + // iter #1 made the `done` resolvability check fire per single level, + // but it always runs *after* the current level's IBP pass. iter #5 + // then uncapped symmetry seeding, so the last still-unresolved needed + // target is now sometimes resolved by *this* level's seeding alone -- + // before its IBP pass would run. In that case the IBP pass only writes + // reduction rules for points outside the targets' dependency closure + // (pure `used` waste: every needed integral is already resolvable). + // + // So probe the *same* `done` condition the loop already uses, right + // after seeding and before pushing this level's IBP tasks. If it + // already holds, skip the IBP pass for this level entirely. Control + // still falls through to the unchanged $USED marking, master marking, + // and post-pass `done` block, which then finalizes the sector through + // the tested termination path. The skip is gated by the identical + // resolvability test, and master marking only *adds* relations, so the + // post-pass check is monotone: pre-check true => post-pass true, and we + // never skip-and-continue (which would leave a level unreduced). If a + // needed target at this level is destined to be a master it is not yet + // marked, so p_get_monoms is empty, the probe is false, and the normal + // IBP pass runs -- the skip is strictly conservative. The master set + // (hence validity) is identical to vanilla; `used` can only drop. + bool skip_ibp_pass = true; { - lock_guard guard(level_mutex); // we will be putting tasks - for (auto level_itr = current_levels.rbegin(); level_itr != current_levels.rend(); ++level_itr) { - level_tasks.push_back(*level_itr); - ++level_tasks_count; + set> probe = needed_in_this_sector; + for (auto pc = probe.begin(); pc != probe.end(); ++pc) { + vector monoms = p_get_monoms(*pc); + if (!monoms.empty()) { + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) { + probe.insert(pc, monom); + } + } + } else if (corner_master_resolved(*pc, sector_number)) { + continue; // iter #56: dim<=3 corner = master, skip the pure-waste IBP pass + } else { + skip_ibp_pass = false; + break; + } } } - level_cond.notify_all(); // level threads can start - { - unique_lock guard(level_mutex); - level_done_cond.wait(guard, []() { return level_tasks_count == 0; }); - // waiting for all work to be done + if (!skip_ibp_pass) { + { + lock_guard guard(level_mutex); // we will be putting tasks + for (auto level_itr = current_levels.rbegin(); level_itr != current_levels.rend(); ++level_itr) { + level_tasks.push_back(*level_itr); + ++level_tasks_count; + } + } + level_cond.notify_all(); // level threads can start + + { + unique_lock guard(level_mutex); + level_done_cond.wait(guard, []() { return level_tasks_count == 0; }); + // waiting for all work to be done + } } auto stop_time = chrono::steady_clock::now(); @@ -1774,15 +2915,39 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { local_pos_pref = Common::pos_pref; } if ((!marked_lower_levels) && current_level.first < local_pos_pref && - (current_level != *levels.rbegin())) { + (current_level != levels_ordered.back())) { // do not mark untill reaching pos_pref or last element // there was a condition !Point::preferred[sector_number].empty(), but // it was always true since they were always added may be we wanted // preferred_initial? - mark_master_integrals(Corner, current_level.first, current_level.second, first_pass, true); + // + // iter #292 (Idea 154 — RETRY-PASS only_preferred WIDENING): + // memory iter #167 closed the FIRST-PASS flip (only_preferred + // true→false at this line, bit-identical, cascade at :2873-2882 + // catches up in same pass). The retry-pass variant is a distinct + // cadence axis: by retry entry the closure has been re-walked over + // every rule written by pass-1 (seeding + every level the outer + // loop processed), and the per-level p_is_empty / IsPreferred set + // is materially different. Widening only_preferred to FALSE on + // RETRY only (first_pass==false → only_preferred=false) marks + // every empty corner at (current_level.first - 1, current_level + // .second - 1) earlier in the retry-pass level walk, instead of + // waiting for the cascade at the first current_level.first == + // local_pos_pref to fire only_preferred=false retroactively. + // Sound by the standing mark_master_integrals invariant: the + // function only calls make_master on points whose p_is_empty(p) + // returns true (line :886), so an existing rule is never + // overwritten; the cascade at :2873-2882 also fires + // only_preferred=false on the same lpair, so no NEW masters get + // created relative to the eventual same-retry-pass cascade end- + // state — only TIMING within retry differs. The widening on + // first_pass is preserved bit-identical to vanilla (passing + // only_preferred = first_pass = true at first pass equals the + // prior `true`). + mark_master_integrals(Corner, current_level.first, current_level.second, first_pass, first_pass); } else if ((!marked_lower_levels) && - ((local_pos_pref == current_level.first) || (current_level == *levels.rbegin()))) { - for (const auto &lpair : levels) { + ((local_pos_pref == current_level.first) || (current_level == levels_ordered.back()))) { + for (const auto &lpair : levels_ordered) { // marking all untill current if (!good_mark) break; @@ -1817,6 +2982,8 @@ void forward_stage(unsigned short thread_number, sector_count_t sector_number) { ivpl.insert(ivpl_counter, monom); } } + } else if (corner_master_resolved(p, sector_number)) { + continue; // iter #56: dim<=3 corner = confirmed master, treat as resolved } else { done = false; break; @@ -2124,6 +3291,121 @@ void reduce_in_level(Point Corner, vector ibps, unsigned int thread_nu unsigned int used_number = 0; int print_counter = 0; + // iter #29: needed-closure-prioritized within-group ordering. + // The "resolving zone" is the set of groups whose highest member + // is not below the smallest needed target -- only there can the + // iter #25 per-pivot early-exit fire (lower groups run in full + // because the targets are not yet resolvable). When we enter a + // resolving-zone group we re-derive the needed-target closure + // over the rules written so far (cheap: the deep chains are + // already settled by the lower groups, so this is a shallow walk) + // and stable-sort that group's equations so the ones touching the + // closure are worked first. See the rationale at the sort below. + bool have_min_needed = (level_needed_targets && !level_needed_targets->empty()); + Point min_needed; + if (have_min_needed) { + min_needed = *level_needed_targets->rbegin(); // set<,greater> -> smallest is last + } + + // iter #37: cross-group closure segregation (front the groups that + // pivot a needed-closure point; defer every off-closure group). + // + // Equation groups (runs of equal highest member in ibps_vector) are + // sorted strictly ascending by highest member, so this level's pivots + // are written bottom-up. The iter #25 per-pivot early-exit breaks the + // OUTER group loop the moment every needed target in this sector + // resolves to a settled chain -- but because the order is ascending, + // only the groups ABOVE the resolving moment are skipped. Every + // off-closure group whose highest member sorts BELOW the resolving + // zone is still pivoted in full: a `used` IBP equation spent on a + // point that no needed target's reduction ever references. iter #29 + // reorders only WITHIN a group; it cannot move those low off-closure + // groups out of the way. + // + // Here we stable_partition the whole level: groups whose pivot point + // (the highest member) lies in the needed-target closure are floated + // to the front (keeping their ascending, bottom-up order among + // themselves -- stable_partition on an already-sorted range), and all + // off-closure groups are deferred to the tail (also order-preserved). + // Once the closure prefix is worked the chain is settled, so the + // early-exit fires at the partition boundary and the entire + // off-closure tail -- including the low groups ascending order would + // have pivoted before the resolving moment -- is skipped. + // + // Monotone-safe, by the standing iter #25 invariant. (1) Validity: + // the early-exit is self-correcting -- all_needed_resolved walks the + // ACTUAL written rules and never fires until every needed target + // reduces to masters/resolved points, whatever order the groups ran + // in; a closure point misclassified into the tail (its membership only + // established by a this-level rule not yet written) simply keeps the + // exit from firing until its deferred group runs, so no needed + // reduction is ever left dangling on an unreduced point. (2) `used`: + // each point that gets pivoted gets exactly one rule regardless of the + // group order (substituting a higher member first just exposes the + // same lower pivots later), so reordering cannot raise the pivot count + // of the fully-worked set; the only net effect is the early-exit + // skipping the off-closure tail, which can only lower `used`. (3) The + // master set is fixed by mark_master_integrals / irreducibility, not by + // elimination order, so it is untouched. Guarded off the hint-writing + // path (a hint file must stay in canonical ascending order) and to the + // non-empty needed-target case (the active reduction path). + if (!hint_local && have_min_needed) { + set needed_closure; + vector work(level_needed_targets->begin(), level_needed_targets->end()); + while (!work.empty()) { + Point q = work.back(); + work.pop_back(); + if (!needed_closure.insert(q).second) + continue; + vector monoms = p_get_monoms(q); + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) + work.push_back(monom); + } + } + std::stable_partition( + ibps_vector.begin(), ibps_vector.end(), + [&needed_closure](const pair> &e) -> bool { + return needed_closure.count(e.first) != 0; + }); + } + + // iter #39: multi-wave closure re-segregation (deepen iter #37). + // The iter #37 partition runs ONCE, before any of this level's rules + // exist, so its closure is only {needed targets} plus same-sector + // points already reachable through the deeper (lower-level) rules + // written in prior level-loop iterations. As we work the floated + // closure prefix, each group writes a reduction rule whose monomials + // are lower same-sector points -- so the needed-target closure GROWS, + // revealing tail groups that the per-pivot early-exit is in fact still + // waiting on but that the one-shot partition left parked behind the + // truly off-closure groups (ascending order interleaves them). Below, + // at every group boundary while still inside the closure region, we + // re-derive the closure over the rules written so far and + // stable_partition the still-UNWORKED suffix [idx, end) closure-first. + // This floats each freshly-revealed wave of needed groups ahead of the + // off-closure mass, so the early-exit fires after skipping a strictly + // longer off-closure tail. It self-limits: the instant a re-partition + // leaves the suffix front off-closure, no unworked group is in the + // (now frozen -- working off-closure pivots cannot add needed-closure + // members) closure, so re-segregation deactivates and the remainder is + // processed linearly under the standing early-exit. + // + // Monotone-safe by the iter #25/#37 invariant. (1) We only ever + // reorder the UNWORKED suffix [idx, end); already-pivoted groups are + // never touched, and stable_partition rearranges in place so the + // current iterator keeps pointing at index idx. (2) Reordering cannot + // change the pivoted set (one rule per pivoted point regardless of + // order) nor the master basis (fixed by mark_master_integrals / + // irreducibility), so validity is preserved and `used` of the + // fully-worked set is unchanged. (3) The early-exit (all_needed_resolved) + // is self-correcting: it never fires until every needed target reduces + // to a settled chain whatever order the groups ran in, so a group + // mis-parked in the tail merely delays the exit, never dangles a needed + // reduction. Net effect is the exit skipping more off-closure pivots -> + // `used` can only drop. Off the hint path, non-empty needed targets. + bool reseg_active = (!hint_local && have_min_needed); + for (vector>>::const_iterator ibps_itr = ibps_vector.begin(); ibps_itr != ibps_vector.end(); ++ibps_itr) { @@ -2136,6 +3418,34 @@ void reduce_in_level(Point Corner, vector ibps, unsigned int thread_nu } } } + + // iter #39: re-segregate the unworked suffix closure-first using + // the rules written so far (see the rationale above the loop). + if (reseg_active) { + set reseg_closure; + vector rwork(level_needed_targets->begin(), level_needed_targets->end()); + while (!rwork.empty()) { + Point q = rwork.back(); + rwork.pop_back(); + if (!reseg_closure.insert(q).second) + continue; + vector monoms = p_get_monoms(q); + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) + rwork.push_back(monom); + } + } + size_t idx = static_cast(ibps_itr - ibps_vector.begin()); + std::stable_partition( + ibps_vector.begin() + idx, ibps_vector.end(), + [&reseg_closure](const pair> &e) -> bool { + return reseg_closure.count(e.first) != 0; + }); + if (reseg_closure.count(ibps_itr->first) == 0) { + reseg_active = false; // suffix front off-closure: closure frozen + } + } + auto itr2 = ibps_itr; int k; int write; @@ -2192,6 +3502,94 @@ void reduce_in_level(Point Corner, vector ibps, unsigned int thread_nu return (j != 0); }); + // iter #29: needed-closure-prioritized reorder inside the + // resolving zone. A group writes a chain of pivots (the rank + // profile of its equations modulo the rules already in the DB); + // that set of pivoted points is invariant to the order the + // equations are worked in, so this reorder can neither change + // which points get reduced (validity is preserved) nor raise + // `used` above a full pass. Its ONLY effect is to move the + // moment the iter #25 per-pivot early-exit (all_needed_resolved) + // fires. The exit can only trim the tail of the resolving group; + // by working the equations that carry a still-open needed-target + // chain point first, every needed target reaches a settled chain + // after fewer pivots, so the break skips a longer tail of + // not-yet-pivoted, off-closure points. Equations touching no + // closure point keep their original (length-based) relative order + // (stable_sort), so the chosen pivots stay as well-conditioned as + // before. Gated to the resolving zone (highest member >= smallest + // needed target) and to multi-equation groups, so the closure walk + // runs only for the handful of top groups where the exit can fire; + // the deep chains are already settled by the lower groups, making + // each walk shallow. Off the hint path (level_needed_targets is + // only published for the non-hint reduction). + if (have_min_needed && write > 1 && !(ibps_itr->first < min_needed)) { + set> closure = *level_needed_targets; + for (auto cit = closure.begin(); cit != closure.end(); ++cit) { + vector monoms = p_get_monoms(*cit); + for (const auto &monom : monoms) { + if (monom.SectorNumber() == sector_number) { + closure.insert(cit, monom); + } + } + } + auto touches = [&closure](const Equation &e) -> bool { + for (const auto &t : e.terms) { + if (closure.count(t.first)) { + return true; + } + } + return false; + }; + std::stable_sort(eqs.begin(), eqs.begin() + write, + [&touches](const Equation &lhs, const Equation &rhs) -> bool { + return touches(lhs) && !touches(rhs); + }); + } + + // iter #25: within-GROUP early-exit (finer than iter #24's + // group-boundary probe). + // + // Equation groups (same highest member) are processed in + // ascending highest-member order, so this level's pivots are + // written bottom-up. iter #24 re-probed the resolvability + // condition only at GROUP boundaries -- after a whole group of + // same-highest-member equations had been worked. But within a + // single group the equations write more than one pivot: the + // first equation reduces the shared highest member, and each + // later equation then has that member substituted away, so its + // own (now strictly-lower) highest member can take a fresh + // pivot. Once every needed target in this sector already reduces + // to a settled chain, ALL remaining pivots -- the rest of this + // group AND every higher group -- are for points OUTSIDE every + // needed target's dependency closure: pure `used` waste. Vanilla + // (the GT) runs the whole final level's IBP pass to completion; + // re-probing after EACH new pivot and breaking immediately trims + // the tail of the resolving group as well, dropping `used` + // strictly further than the group-boundary check with the same + // answer. + // + // This is the exact resolvability condition the iter #6 pre-IBP + // probe uses, just re-evaluated per-pivot. If a needed target on + // this level is destined to be a master, mark_master_integrals + // has not run yet, so p_get_monoms is empty, the probe is false, + // and the pass continues -- strictly conservative. Master + // marking (which the main loop runs *after* this worker returns) + // only adds resolutions, so worker-resolved => post-pass `done` + // holds: we only ever break-and-finalize, never + // break-and-continue, so no level is left half-reduced on the + // path to a higher one. Points the skipped equations would have + // reduced are by definition outside every needed chain; whether + // they end up reduced or master-marked never touches a requested + // reduction, so the master set (hence validity) stays consistent + // and `used` can only drop. Re-probed only right after a new + // pivot (used flipped). Cheap in the common case: + // all_needed_resolved walks `needed` in descending order and + // returns false at the first still-unresolved (highest) target, + // so the deep chain walk runs only at the resolving moment. + // Guarded off the hint-writing path so a hint file is never + // truncated mid-group. + bool early_exit = false; for (k = 0; k != write; ++k) { // cycle of same starting point bool used = work_with_equation(eqs[k], thread_number, sector_number); if (used) { @@ -2207,8 +3605,16 @@ void reduce_in_level(Point Corner, vector ibps, unsigned int thread_nu out << int(p.buf[Common::dimension - 1]) << "}" << "," << i << "}"; } ++used_number; + if (!hint_local && level_needed_targets && + all_needed_resolved(*level_needed_targets, sector_number)) { + early_exit = true; + break; + } } } + if (early_exit) { + break; + } ibps_itr = itr2; --ibps_itr; } // Equation cycle