From 82d47da0b2efafec94e3fab7a1c7b34ea34eadda Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 28 May 2026 05:39:09 +0000 Subject: [PATCH] feat(nl_shade_lbc): NL-SHADE-LBC adaptive DE (CEC 2022 winner) Add NLSHADE_LBC, a direct NLSHADE_RSP subclass porting the Stanovov-Akhmedova-Semenkin (CEC 2022) refinement: **Linear Bias Change** in the success-history memory update. The standard L-SHADE / jSO / NL-SHADE-RSP Lehmer mean uses fixed exponents (s^2/s^1); NL-SHADE-LBC generalises this to Sigma(w*s^p) / Sigma(w*s^(p-m)) with the order p linearly scheduled across budget progress (p_F: 3.5 -> 1.5, p_CR: 1.0 -> 1.5, spread m_lbc = 1.5). At p=2, m=1 the formula recovers the standard L-SHADE Lehmer mean, so both regimes are reachable from the default catalog and the bandit can flip between them. NLSHADE_LBC inherits the entire NL-SHADE-RSP / jSO / L-SHADE asynchronous pipeline (per-slot pending dict, generation-by-count book-keeping, archive of replaced parents, jSO frozen anchor memory bin, weighted current-to-pbest-w/1 mutation, linear p_best schedule, asymmetric F-cap, NLPSR, RSP r1 selection, randomised adaptive archive, warm restart) and overrides only _update_memory. The CR-zero handling preserves the L-SHADE terminal sentinel rule and filters strict zeros out of the LBC sum (because s^(p-m) with p +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +NL-SHADE-LBC Heuristic +====================== + +NL-SHADE-LBC (Stanovov, Akhmedova & Semenkin, CEC 2022) — winner of the +CEC-2022 single-objective bound-constrained competition. It is the +direct successor to +:class:`~panobbgo.heuristics.nl_shade_rsp.NLSHADE_RSP` (CEC 2021) and +inherits its entire pipeline: the jSO-derived asynchronous DE +(per-slot pending dict, generation-by-count book-keeping, archive of +replaced parents, success-history memory with the frozen jSO anchor bin, +weighted ``current-to-pbest-w/1`` mutation, linear ``p_best`` schedule, +asymmetric F-cap, warm restart), plus NL-SHADE-RSP's Non-Linear +Population Size Reduction, Rank-based Selective Pressure on the ``r1`` +draw, and randomised adaptive archive cap. + +NL-SHADE-LBC adds **one further refinement** on top of NL-SHADE-RSP: +**Linear Bias Change** in the success-history memory update. The +standard L-SHADE / jSO / NL-SHADE-RSP Lehmer mean uses fixed exponents +(``s^2 / s^1`` — i.e. order ``p = 2`` with spread ``m = 1``). +NL-SHADE-LBC generalises this to:: + + L_{p,m}(s, w) = Σ(w_i · s_i^p) / Σ(w_i · s_i^{p − m}) + +with the *order* ``p`` **changing linearly with budget progress**:: + + p_F(r) = (1 − r) · p_F_init + r · p_F_final + p_CR(r) = (1 − r) · p_CR_init + r · p_CR_final + +where ``r = len(strategy.results) / max_eval ∈ [0, 1]``. The spread +``m`` is held constant. At ``p = 2, m = 1`` the formula recovers the +standard L-SHADE weighted Lehmer mean; with the literature-default +schedule the bias toward larger F values *shrinks* and the bias toward +larger CR values *grows* across the budget. + +The defaults follow Stanovov, Akhmedova & Semenkin (2022) — derived +from the MetaBox open-source reference implementation: + +* ``p_F_init = 3.5``, ``p_F_final = 1.5`` — F bias starts high + (concentrating memory on the *largest* successful F's, encouraging + exploration) and decays to the L-SHADE-style spread by the end + (letting the small, exploitative F's that survived late dominate). +* ``p_CR_init = 1.0``, ``p_CR_final = 1.5`` — CR bias starts low (the + weighted mean stays close to the arithmetic mean of successful CR's, + preserving diversity in the crossover-rate distribution) and grows + to the same balanced point by the end. +* ``m_lbc = 1.5`` — the spread between numerator and denominator + exponents. At ``m = 1.0`` the formula collapses to the standard + Lehmer mean of order ``p``. + +When the strategy budget is unknown (no ``max_eval``), the progress +helper returns ``None`` and the schedule falls back to its **initial** +exponents — i.e. NL-SHADE-LBC behaves as if it were always at the start +of the search. This is a documented, predictable fallback and matches +the convention used by :class:`~panobbgo.heuristics.lshade.LSHADE.F_schedule`. + +Asynchronous execution +---------------------- + +Identical to NL-SHADE-RSP / jSO / L-SHADE. The only method that +changes is :meth:`_update_memory` (the Lehmer-mean computation). +Everything else — NLPSR, RSP r1 selection, the randomised adaptive +archive, the jSO frozen anchor bin and pointer-skip rule, warm +restart — is inherited unchanged. + +Deviations from the full CEC-2022 paper +--------------------------------------- + +For transparency (the Panobbgo norm is literature-faithful ports): two +NL-SHADE-LBC mechanisms are intentionally **not** ported here, because +they interact with the synchronous generation model in ways the +asynchronous pipeline does not expose cleanly: + +* the *adaptive binomial / exponential crossover blend* (inherited + from NL-SHADE-RSP — see the same caveat there), and +* the *repetitive generation* bound-constraint handling (Panobbgo's + asynchronous pipeline runs through ``strategy.constraint_handler`` and + the L-SHADE midpoint-reflection repair instead). + +Both are queued as follow-ups in +``planning/SELF_IMPROVEMENT_LOOP.md``. + +CR-zero handling +---------------- + +The standard L-SHADE / jSO / NL-SHADE-RSP CR=0 terminal sentinel rule +is preserved: if every successful CR in this generation is zero, *or* +the memory bin has already been planted with the sentinel, the bin +remains terminal. The LBC Lehmer mean is applied only to the strictly +positive subset of the success CR vector, because at ``p_CR < m_lbc`` +the denominator exponent goes negative and ``0^{negative} → ∞``. + +References +---------- + +* V. Stanovov, S. Akhmedova & E. Semenkin (2022). "NL-SHADE-LBC + algorithm with linear parameter adaptation bias change for CEC 2022 + Numerical Optimization." *Proceedings of CEC 2022*. Winner of the + CEC-2022 single-objective bound-constrained competition. +* V. Stanovov, S. Akhmedova & E. Semenkin (2021). "NL-SHADE-RSP + Algorithm with Adaptive Archive and Selective Pressure for CEC 2021 + Numerical Optimization." *Proceedings of CEC 2021*. The + NL-SHADE-RSP foundation this refines. +* J. Brest, M. S. Maučec & B. Bošković (2017). "Single Objective + Real-Parameter Optimization: Algorithm jSO." *Proceedings of CEC + 2017*. The jSO foundation NL-SHADE-RSP refines. +""" + +from __future__ import annotations + +from typing import Optional + +import numpy as np + +from panobbgo.heuristics.jso import ( + _DEFAULT_ARCHIVE_FACTOR, + _DEFAULT_H, + _DEFAULT_NP_INIT, + _DEFAULT_NP_MIN, + _DEFAULT_P_BEST_MAX, + _DEFAULT_P_BEST_MIN, +) +from panobbgo.heuristics.lshade import _CR_TERMINAL +from panobbgo.heuristics.nl_shade_rsp import NLSHADE_RSP, _DEFAULT_K_RANK + +# Defaults from Stanovov, Akhmedova & Semenkin (2022) — also published +# in the MetaBox reference implementation +# (https://github.com/MetaEvo/MetaBox/blob/master/src/baseline/bbo/nlshadelbc.py). +_DEFAULT_P_F_INIT: float = 3.5 +_DEFAULT_P_F_FINAL: float = 1.5 +_DEFAULT_P_CR_INIT: float = 1.0 +_DEFAULT_P_CR_FINAL: float = 1.5 +_DEFAULT_M_LBC: float = 1.5 + + +class NLSHADE_LBC(NLSHADE_RSP): + """NL-SHADE-LBC: linear bias change on the SHADE memory update. + + Args: + strategy: The owning :class:`~panobbgo.core.StrategyBase`. + NP_init: Initial population size. Default ``30``. + NP_min: Minimum population size after non-linear reduction. + Default ``4``. + H: History memory size. Default ``5`` (inherits the jSO + anchor bin; must be ``>= 2``). + p_best_max: Upper bound on the linear ``p_best`` schedule. + Default ``0.25``. + p_best_min: Lower bound on the linear ``p_best`` schedule. + Default ``0.125``. + archive_factor: Multiplier for the external archive cap. + Default ``1.0``. + k_rank: Rank-based selective-pressure coefficient (NL-SHADE-RSP). + Default ``3.0``. + adaptive_archive: When ``True`` (default), resample the archive + cap per generation (NL-SHADE-RSP). + p_F_init: Initial exponent of the F Lehmer mean's numerator + (at progress ``r = 0``). Default ``3.5``. + p_F_final: Final exponent of the F Lehmer mean's numerator + (at progress ``r = 1``). Default ``1.5``. + p_CR_init: Initial exponent of the CR Lehmer mean's numerator. + Default ``1.0``. + p_CR_final: Final exponent of the CR Lehmer mean's numerator. + Default ``1.5``. + m_lbc: Spread between numerator and denominator exponents of + the Lehmer mean. The denominator exponent is + ``p − m_lbc``. Default ``1.5``. Must be a finite float + ``> 0``. At ``p = 2, m = 1`` the formula recovers the + standard L-SHADE weighted Lehmer mean. + seed: Optional seed for the per-instance RNG. + name: Override the heuristic's display name. + + Notes: + - All numeric arguments are validated; bad values raise + :class:`ValueError`. + - Like every Panobbgo heuristic, all state is per-instance. + - When the strategy budget is unknown, the LBC schedule falls + back to its initial exponents. + """ + + def __init__( + self, + strategy, + NP_init: int = _DEFAULT_NP_INIT, + NP_min: int = _DEFAULT_NP_MIN, + H: int = _DEFAULT_H, + p_best_max: float = _DEFAULT_P_BEST_MAX, + p_best_min: float = _DEFAULT_P_BEST_MIN, + archive_factor: float = _DEFAULT_ARCHIVE_FACTOR, + k_rank: float = _DEFAULT_K_RANK, + adaptive_archive: bool = True, + p_F_init: float = _DEFAULT_P_F_INIT, + p_F_final: float = _DEFAULT_P_F_FINAL, + p_CR_init: float = _DEFAULT_P_CR_INIT, + p_CR_final: float = _DEFAULT_P_CR_FINAL, + m_lbc: float = _DEFAULT_M_LBC, + seed: Optional[int] = None, + name: Optional[str] = None, + ) -> None: + for label, v in ( + ("p_F_init", p_F_init), + ("p_F_final", p_F_final), + ("p_CR_init", p_CR_init), + ("p_CR_final", p_CR_final), + ("m_lbc", m_lbc), + ): + if not np.isfinite(v): + raise ValueError(f"NLSHADE_LBC: {label} must be a finite float, got {v!r}") + if m_lbc <= 0.0: + raise ValueError(f"NLSHADE_LBC: m_lbc must be > 0, got {m_lbc}") + + super().__init__( + strategy, + NP_init=NP_init, + NP_min=NP_min, + H=H, + p_best_max=p_best_max, + p_best_min=p_best_min, + archive_factor=archive_factor, + k_rank=k_rank, + adaptive_archive=adaptive_archive, + seed=seed, + name=name or "NLSHADE_LBC", + ) + self.p_F_init: float = float(p_F_init) + self.p_F_final: float = float(p_F_final) + self.p_CR_init: float = float(p_CR_init) + self.p_CR_final: float = float(p_CR_final) + self.m_lbc: float = float(m_lbc) + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + def _lbc_exponent(self, p_init: float, p_final: float) -> float: + """Linear bias change schedule. + + ``p(r) = (1 − r) · p_init + r · p_final``. When the strategy + budget is unknown (``_progress() is None``) the schedule falls + back to ``p_init`` — a documented, predictable fallback. + """ + progress = self._progress() + if progress is None: + return p_init + r = float(np.clip(progress, 0.0, 1.0)) + return (1.0 - r) * p_init + r * p_final + + # ------------------------------------------------------------------ + # Overrides + # ------------------------------------------------------------------ + + def _update_memory(self) -> None: + """Apply the LBC generalized Lehmer mean for one generation. + + Identical to :meth:`JSO._update_memory` except the fixed + ``s^2 / s^1`` exponents are replaced by the LBC schedule + ``s^p(r) / s^(p(r) − m_lbc)``. The jSO anchor-bin skip and + pointer-modulo logic (write range ``[0, H − 2]``, advance + ``% (H − 1)``) is preserved by writing through ``write_idx`` + rather than ``self._mem_ptr`` directly. + """ + if not self._success_F: + return + if self.H < 2: # defensive — constructor enforces H >= 2 + return + + F_arr = np.asarray(self._success_F, dtype=float) + CR_arr = np.asarray(self._success_CR, dtype=float) + delta_arr = np.asarray(self._success_delta, dtype=float) + total = float(delta_arr.sum()) + if total > 0.0: + w = delta_arr / total + else: + w = np.full_like(delta_arr, 1.0 / len(delta_arr)) + + # jSO-style anchor-bin skip: only bins [0, H-2] are writable. + write_idx = self._mem_ptr + if write_idx >= self.H - 1: + write_idx = 0 # defensive — should be impossible by construction + + # F memory: LBC Lehmer mean. F > 0 is guaranteed by the Cauchy + # redraw logic, so no zero-handling is required. + p_F = self._lbc_exponent(self.p_F_init, self.p_F_final) + F_num = float(np.sum(w * F_arr**p_F)) + F_den = float(np.sum(w * F_arr ** (p_F - self.m_lbc))) + if F_den > 0.0: + self._M_F[write_idx] = float(np.clip(F_num / F_den, 0.0, 1.0)) + + # CR memory: terminal-sentinel rule preserved; LBC Lehmer mean on + # the strictly-positive subset (so ``CR^{negative}`` is never + # evaluated at zero). + cr_max = float(CR_arr.max()) + if cr_max <= 0.0 or self._M_CR[write_idx] < 0.0: + self._M_CR[write_idx] = _CR_TERMINAL + else: + positive = CR_arr > 0.0 + w_pos = w[positive] + CR_pos = CR_arr[positive] + w_sum = float(w_pos.sum()) + if w_sum > 0.0: + w_pos = w_pos / w_sum + p_CR = self._lbc_exponent(self.p_CR_init, self.p_CR_final) + CR_num = float(np.sum(w_pos * CR_pos**p_CR)) + CR_den = float(np.sum(w_pos * CR_pos ** (p_CR - self.m_lbc))) + if CR_den > 0.0: + self._M_CR[write_idx] = float(np.clip(CR_num / CR_den, 0.0, 1.0)) + + # Advance pointer over writable range only. + self._mem_ptr = (write_idx + 1) % (self.H - 1) diff --git a/panobbgo/self_improve.py b/panobbgo/self_improve.py index ff78dae..9025d43 100644 --- a/panobbgo/self_improve.py +++ b/panobbgo/self_improve.py @@ -1353,6 +1353,83 @@ def default_catalog() -> MutationCatalog: choices=(True, False), probability=0.3, ), + # NL-SHADE-LBC initial population size — same ``[10, 60]`` + # bracket as the L-SHADE / jSO / NL-SHADE-RSP family. + MutationRule( + strategy_pattern="", + class_name="NLSHADE_LBC", + param_name="NP_init", + kind="integer_add", + bounds=(10, 60), + delta_choices=(-10, -5, 5, 10), + probability=0.5, + ), + # NL-SHADE-LBC F-memory initial Lehmer exponent. Literature + # default ``3.5``; bracket ``[1.5, 5.0]`` so the loop can probe + # weaker (closer to the L-SHADE-style bias) or stronger + # (heavily weighting the largest successful F's at the start + # of the search) initial bias. ``p_F_init`` only takes effect + # via the schedule when the strategy budget is known. + MutationRule( + strategy_pattern="", + class_name="NLSHADE_LBC", + param_name="p_F_init", + kind="float_uniform", + bounds=(1.5, 5.0), + low=1.5, + high=5.0, + probability=0.5, + ), + # NL-SHADE-LBC F-memory final Lehmer exponent. Literature + # default ``1.5``; bracket ``[1.0, 3.0]`` so the loop can probe + # values bracketing the standard L-SHADE order-2 exponent. + MutationRule( + strategy_pattern="", + class_name="NLSHADE_LBC", + param_name="p_F_final", + kind="float_uniform", + bounds=(1.0, 3.0), + low=1.0, + high=3.0, + probability=0.5, + ), + # NL-SHADE-LBC CR-memory schedule (initial / final). CR + # literature defaults are ``1.0 → 1.5``; bracket ``[0.5, 2.5]`` + # so the loop can probe pure-arithmetic-mean-like behaviour + # (low exponent) as well as more biased regimes. + MutationRule( + strategy_pattern="", + class_name="NLSHADE_LBC", + param_name="p_CR_init", + kind="float_uniform", + bounds=(0.5, 2.5), + low=0.5, + high=2.5, + probability=0.5, + ), + MutationRule( + strategy_pattern="", + class_name="NLSHADE_LBC", + param_name="p_CR_final", + kind="float_uniform", + bounds=(0.5, 2.5), + low=0.5, + high=2.5, + probability=0.5, + ), + # NL-SHADE-LBC Lehmer spread. Default ``1.5`` (CEC-2022); + # ``1.0`` recovers the standard L-SHADE Lehmer-mean spread. + # Bracket ``[1.0, 2.0]`` so the loop can flip between them. + MutationRule( + strategy_pattern="", + class_name="NLSHADE_LBC", + param_name="m_lbc", + kind="float_uniform", + bounds=(1.0, 2.0), + low=1.0, + high=2.0, + probability=0.5, + ), # COBYQA (Ragonneau-Zhang 2023) initial trust-region radius — # log-uniform around the literature default (0.1). Only fires # when a spec explicitly sets ``initial_tr_radius`` (the @@ -1434,6 +1511,7 @@ def default_structural_catalog() -> MutationCatalog: from panobbgo.heuristics.lshade import LSHADE from panobbgo.heuristics.jso import JSO from panobbgo.heuristics.nl_shade_rsp import NLSHADE_RSP + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC from panobbgo.heuristics.cobyqa import COBYQA # Three PSO entries cover the canonical ``gbest`` (default @@ -1466,6 +1544,12 @@ def default_structural_catalog() -> MutationCatalog: # differential ``r1`` draw, and a randomised adaptive archive. Listed # as a separate candidate class from L-SHADE / jSO so the bandit can # weigh whichever DE-family arm wins on the current battery. + # NL-SHADE-LBC (Stanovov, Akhmedova & Semenkin 2022) is the CEC-2022 + # winner, a direct refinement of NL-SHADE-RSP that adds **Linear Bias + # Change** in the success-history memory update: the Lehmer-mean + # exponents for F and CR follow a budget-progress schedule. Listed + # as a separate candidate class so the bandit can weigh whichever + # DE-family arm wins on the current battery. candidates: Tuple[Tuple[type, Dict[str, Any]], ...] = ( (Random, {}), (Nearby, {"radius": 0.1, "axes": "all", "new": 3}), @@ -1480,6 +1564,7 @@ def default_structural_catalog() -> MutationCatalog: (LSHADE, {"NP_init": 30}), # adaptive DE w/ linear pop reduction (JSO, {"NP_init": 30}), # CEC-2017 winner, weighted current-to-pbest-w/1 (NLSHADE_RSP, {"NP_init": 30, "k_rank": 3.0}), # CEC-2021 winner, NLPSR + RSP + (NLSHADE_LBC, {"NP_init": 30, "k_rank": 3.0}), # CEC-2022 winner, NLPSR + RSP + LBC (COBYQA, {}), # Powell-family derivative-free trust-region local optimizer ) structural_rules: List[CatalogRule] = [ diff --git a/planning/SELF_IMPROVEMENT_LOOP.md b/planning/SELF_IMPROVEMENT_LOOP.md index 80d3b86..b8df037 100644 --- a/planning/SELF_IMPROVEMENT_LOOP.md +++ b/planning/SELF_IMPROVEMENT_LOOP.md @@ -633,6 +633,131 @@ This section records direct algorithmic improvements applied to Panobbgo greppable. Each entry should reference the PR / commit that landed it, the rationale, and a measured-impact number when available. +### 2026-05-28 — NL-SHADE-LBC adaptive DE (CEC 2022 winner) + +* **What** — `panobbgo/heuristics/nl_shade_lbc.py` adds the + :class:`NLSHADE_LBC` heuristic, a direct subclass of + :class:`~panobbgo.heuristics.nl_shade_rsp.NLSHADE_RSP` (CEC 2021 + winner) that ports the Stanovov-Akhmedova-Semenkin (CEC 2022) + "NL-SHADE-LBC" refinement. NL-SHADE-LBC inherits the entire + NL-SHADE-RSP / jSO / L-SHADE asynchronous pipeline (per-slot pending + dict, generation-by-count book-keeping, archive of replaced parents, + success-history memory with the frozen jSO anchor bin, weighted + ``current-to-pbest-w/1`` mutation, linear ``p_best`` schedule, + asymmetric F-cap, NLPSR, RSP r1 selection, randomised adaptive + archive, warm restart) and adds **Linear Bias Change** in the + memory update: + + The standard L-SHADE / jSO / NL-SHADE-RSP memory update uses a fixed + Lehmer mean of order 2 with spread 1 (``Σ(w·s²) / Σ(w·s)``). + NL-SHADE-LBC generalises this to:: + + L_{p,m}(s, w) = Σ(w_i · s_i^p) / Σ(w_i · s_i^{p − m}) + + with the **order** ``p`` linearly scheduled across budget progress + ``r = len(strategy.results) / max_eval``:: + + p_F(r) = (1 − r) · p_F_init + r · p_F_final + p_CR(r) = (1 − r) · p_CR_init + r · p_CR_final + + Literature defaults from Stanovov et al. (2022) — verified against + the MetaBox reference implementation: ``p_F_init = 3.5``, + ``p_F_final = 1.5``, ``p_CR_init = 1.0``, ``p_CR_final = 1.5``, + ``m_lbc = 1.5``. The F-bias starts high (concentrating memory on + the *largest* successful F's, encouraging exploration) and decays; + the CR-bias starts low (preserving CR diversity) and grows. At + ``p = 2, m = 1`` the formula recovers the L-SHADE Lehmer mean — both + regimes are reachable from the default catalog so the bandit can + flip between them. + + CR-zero handling preserves the L-SHADE terminal sentinel rule and + filters strict zeros out of the LBC sum (because ``s^(p − m)`` with + ``p < m`` blows up at ``s = 0``). Registered in + :mod:`panobbgo.heuristics`; :func:`default_structural_catalog` gains + it as a fifteenth ``add_heuristic`` candidate + (``avoid_duplicates=True``); :func:`default_catalog` gains six rules + — ``NLSHADE_LBC.NP_init`` (integer_add), ``NLSHADE_LBC.p_F_init`` + (float_uniform ``[1.5, 5.0]``), ``NLSHADE_LBC.p_F_final`` + (float_uniform ``[1.0, 3.0]``), ``NLSHADE_LBC.p_CR_init`` + (float_uniform ``[0.5, 2.5]``), ``NLSHADE_LBC.p_CR_final`` + (float_uniform ``[0.5, 2.5]``), and ``NLSHADE_LBC.m_lbc`` + (float_uniform ``[1.0, 2.0]``). +* **Why** — closes the *NL-SHADE-LBC* DE-family follow-up listed under + the NL-SHADE-RSP entry above. NL-SHADE-LBC won the **CEC-2022** + single-objective bound-constrained competition and is the direct + NL-SHADE-RSP descendant; it represents the literature frontier as of + the most recent CEC competition we can mirror. Subclassing + NL-SHADE-RSP keeps the new heuristic at the literature frontier + while leaving NL-SHADE-RSP / jSO / L-SHADE byte-identical for + ledger reproducibility — the precedent set by the NL-SHADE-RSP entry + itself. Adds a fifth DE-family arm the bandit can pick whichever + wins on the current battery. +* **Deviations from the full CEC-2022 paper** — for honesty (the + Panobbgo norm is literature-faithful ports): two NL-SHADE-LBC + mechanisms are intentionally **not** ported because they interact + with the synchronous generation model in ways the asynchronous + pipeline does not expose cleanly: the *adaptive binomial / + exponential crossover blend* (also intentionally not ported from + NL-SHADE-RSP — see the same caveat there), and the *repetitive + generation* bound-constraint handling (Panobbgo uses + ``strategy.constraint_handler`` and L-SHADE midpoint-reflection + repair instead). Both are queued as follow-ups below. +* **Impact** — the value of shipping this today is to give the + self-improvement loop a CEC-2022-class DE arm the bandit can select + once it has accumulated per-arm reward history. Like NL-SHADE-RSP + before it, the LBC refinements are **large-budget specialists**: at + panobbgo's small composite-battery budgets (75–500 evals) the + bias-change schedule barely warms up, so the quick-mode signal is + expected within noise. *Evidence form (per AGENTS.md "Agent-driven + improve X PRs"): catalog-only addition; backwards-compatible + (composite baseline byte-identical, existing ledgers stay valid); + queued for nightly loop validation via the structural catalog.* +* **Backwards compatibility** — strictly safe. NLSHADE_LBC is opt-in: + it is not added to any default :func:`_make_quick_strategies` / + :func:`_make_standard_strategies` / :func:`_make_full_strategies` + spec, so the composite baseline on every default battery is + byte-identical and existing ledgers stay valid. The structural + catalog gains it as one extra ``add_heuristic`` candidate + (``avoid_duplicates=True``). The kwarg rules fire only when a spec + sets the matching kwarg explicitly. NL-SHADE-RSP / jSO / L-SHADE + are untouched — only the LBC subclass overrides + :meth:`_update_memory`; the base classes' ``_update_memory`` methods + are byte-identical, verified by a regression test that + ``NLSHADE_RSP._update_memory`` still produces the standard L-SHADE + Lehmer mean output. +* **Tests** — `tests/test_heuristic_nl_shade_lbc.py` (30 tests): + construction validation (defaults, custom kwargs, subclass invariant + spanning NLSHADE_RSP / JSO / LSHADE, invalid / inf / NaN p_F_init / + p_F_final / p_CR_init / p_CR_final / m_lbc, m_lbc=0 and m_lbc<0 + rejection, inherited NLSHADE_RSP / jSO ``H >= 2`` / ``p_best`` + ordering / ``k_rank`` rules); LBC schedule (endpoints + progress=0/progress=1, linear midpoint, clipping at progress > 1, + fallback to p_init when budget unknown); memory update (no write to + the anchor bin H-1, pointer advances ``% (H-1)``, no-op on empty + buffer, F memory clamped to [0,1], LBC formula at progress=0 with + custom exponents matches Σ(w·F^3.5)/Σ(w·F^2.0), p=2/m=1 recovers the + standard L-SHADE Lehmer mean for *both* F and CR, CR=0 plants the + terminal sentinel, terminal-bin stays terminal, mixed-zero CR values + filtered before LBC computation, zero-delta successes fall back to + uniform weights); pipeline (on_start emits NP_init, smoke + convergence on a quadratic with no negative global progress, restart + resets archive and pending); inheritance safety (NLSHADE_RSP + ``_update_memory`` still produces standard L-SHADE mean); and + registration (package re-export + ``__all__``, structural catalog + membership, six kwarg catalog dials). +* **Documentation updated** + - `planning/SELF_IMPROVEMENT_LOOP.md`: this §13 entry; the + *NL-SHADE-LBC* next-iteration idea promoted to "shipped". + - `doc/source/heuristics.rst`: new ``NLSHADE_LBC`` bullet; the + DE-family complementarity bullet now names all five arms. + - `doc/source/guide_architecture.rst`: new ``NLSHADE_LBC`` + description after NLSHADE_RSP. + - `doc/source/guide_benchmarking.rst`: structural-catalog candidate + pool lists ``NLSHADE_LBC``; the DE-family complementarity blurb + extends to five arms. + - `doc/source/guide.rst`: quick-nav entry mentions NL-SHADE-LBC and + the Linear Bias Change mechanism. + ### 2026-05-26 — Loop deduplication guard (in-flight PR awareness) * **What** — Added §12.3 step 0 and a callout at the head of "Next @@ -2480,13 +2605,15 @@ motivate the work: (Zhang-Sanderson 2009) uses a slightly different rule that weights archive entries by recency; this could be a small per-step refinement. -- **L-SHADE-RSP / NL-SHADE-RSP follow-on variants** — NL-SHADE-RSP - (CEC 2021 winner) shipped 2026-05-25 as +- **L-SHADE-RSP / NL-SHADE-RSP / NL-SHADE-LBC follow-on variants** — + NL-SHADE-RSP (CEC 2021 winner) shipped 2026-05-25 as :class:`~panobbgo.heuristics.nl_shade_rsp.NLSHADE_RSP` (rank-based selective pressure, non-linear population reduction, randomised - adaptive archive); see the §13 entry. The remaining successor, - NL-SHADE-LBC (CEC 2022), is queued under the *NL-SHADE-RSP - heuristic* next-iteration idea below. + adaptive archive); see the §13 entry. NL-SHADE-LBC (CEC 2022 + winner) shipped 2026-05-28 as + :class:`~panobbgo.heuristics.nl_shade_lbc.NLSHADE_LBC` (Linear Bias + Change in the success-history Lehmer-mean memory update); see the + §13 entry above. - **iLSHADE / jSO adaptive p_best schedule** — shipped 2026-05-19 as the opt-in ``LSHADE.p_best_end`` kwarg plus the :meth:`LSHADE._current_p_best` helper. See the §13 entry. @@ -2703,10 +2830,14 @@ motivate the work: trial used) that the current ``_TrialMeta`` does not carry; adding two optional fields to ``_TrialMeta`` and the matching success accounting in ``on_new_results`` is the clean shape. -* **NL-SHADE-LBC** (CEC 2022 winner) — the successor that adds a - *linear bias-correction* mechanism on top of NL-SHADE-RSP. - Subclassing :class:`NLSHADE_RSP` is the obvious shape now that the - RSP / NLPSR / archive hooks exist. +* **NL-SHADE-LBC** (CEC 2022 winner) — **shipped 2026-05-28** as + :class:`~panobbgo.heuristics.nl_shade_lbc.NLSHADE_LBC`, a direct + :class:`NLSHADE_RSP` subclass that adds Linear Bias Change in the + F / CR Lehmer-mean memory update: the order ``p`` is linearly + scheduled across budget progress instead of fixed at ``2`` (defaults + ``p_F: 3.5 → 1.5``, ``p_CR: 1.0 → 1.5``, spread ``m_lbc = 1.5``). + At ``p = 2, m = 1`` the formula recovers the standard L-SHADE + Lehmer mean. See the §13 entry. * **Categorical ``k_rank`` regimes** — the ``NLSHADE_RSP.k_rank`` rule is currently ``float_uniform [1, 5]``. A ``categorical_choice`` over the literature-canonical settings (``0`` = uniform, ``3`` = @@ -2714,6 +2845,41 @@ motivate the work: selective-pressure regime discretely, the same way ``LSHADE.archive_factor`` flips archive on / off / RSP. +#### NL-SHADE-LBC follow-ups (after 2026-05-28 ship) + +NL-SHADE-LBC shipped 2026-05-28 as +:class:`~panobbgo.heuristics.nl_shade_lbc.NLSHADE_LBC`; see the §13 +entry above. Natural extensions when the loop has collected enough +evidence to motivate the work: + +* **Categorical LBC regimes** — the four LBC schedule kwargs + (``p_F_init``, ``p_F_final``, ``p_CR_init``, ``p_CR_final``) and the + spread ``m_lbc`` are exposed as ``float_uniform`` rules today. A + set of literature-canonical *named* regimes — ``"cec2022"`` (the + Stanovov defaults 3.5/1.5/1.0/1.5/1.5), ``"lshade"`` + (2/2/2/2/1 — recovers standard L-SHADE), ``"flat"`` + (1/1/1/1/1.5 — pure arithmetic mean), ``"aggressive"`` + (5/3/3/5/1.5 — strongly biased throughout) — wrapped as one + ``categorical_choice`` per slot would give the bandit a discrete + arm to flip the bias regime cleanly, the same way + ``LSHADE.F_schedule`` flips the jSO F-cap on / off. Implementation + shape: a single composite kwarg ``lbc_regime`` whose setter applies + the named tuple to the five fields, plus the categorical rule. +* **Per-CR / per-F sub-regime A/B** — the literature defaults flow + F-bias from high to low while CR-bias does the opposite. The + motivation in the paper is qualitative; nightly evidence may reveal + problem classes where *both* should decrease (or both increase). A + measured A/B at ``--standard`` mode with the bandit constrained to + the LBC arm would identify whether the paper's asymmetric schedule + generalises beyond the CEC battery. +* **Adaptive bias bounds from the success history** — instead of + using the static linear schedule, infer the schedule from the + observed variance of successful F / CR values. When the success + variance is low (memory is converging), more bias is helpful; + when high (exploration still useful), less bias. Speculative — + the paper's static schedule is well-tuned; a learned schedule + would need to clearly beat it on cross-problem averages. + #### Run a measured A/B across PSO topologies (gbest / lbest / vonneumann) Von Neumann shipped 2026-05-22 (see §13). The literature predicts diff --git a/tests/test_heuristic_nl_shade_lbc.py b/tests/test_heuristic_nl_shade_lbc.py new file mode 100644 index 0000000..c89cf03 --- /dev/null +++ b/tests/test_heuristic_nl_shade_lbc.py @@ -0,0 +1,523 @@ +# -*- coding: utf8 -*- +# Copyright 2012 -- 2026 Harald Schilly +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for the NL-SHADE-LBC (Stanovov et al. 2022) adaptive DE heuristic.""" + +from __future__ import annotations + +import numpy as np +import pytest + +from panobbgo.utils import PanobbgoTestCase + + +class _MockStrategyMixin: + """Same scaffolding as the NL-SHADE-RSP / jSO / L-SHADE tests. + + NL-SHADE-LBC inherits the entire NL-SHADE-RSP pipeline, so the mock + strategy needs the same constraint-handler / max_eval setup. + ``config.max_eval`` is saved / restored to prevent cross-test bleed. + """ + + def setUp(self): + super().setUp() + from panobbgo.lib.constraints import DefaultConstraintHandler + + self.strategy.constraint_handler = DefaultConstraintHandler(self.strategy) + self._orig_max_eval = self.strategy.config.max_eval + self.strategy.config.max_eval = 1000 + self.strategy.results = [] + + def tearDown(self): + self.strategy.config.max_eval = self._orig_max_eval + super().tearDown() + + +def _build_result(strategy, x, fx, who): + from panobbgo.lib import Point, Result + + return Result(Point(np.asarray(x, dtype=float), who), float(fx)) + + +# ---------------------------------------------------------------------- +# Construction-time validation +# ---------------------------------------------------------------------- + + +class NLSHADELBCConstructionTests(_MockStrategyMixin, PanobbgoTestCase): + def test_default_construction(self): + from panobbgo.heuristics.nl_shade_lbc import ( + NLSHADE_LBC, + _DEFAULT_M_LBC, + _DEFAULT_P_CR_FINAL, + _DEFAULT_P_CR_INIT, + _DEFAULT_P_F_FINAL, + _DEFAULT_P_F_INIT, + ) + + h = NLSHADE_LBC(self.strategy) + assert h.NP_init == 30 + assert h.NP_min == 4 + assert h.H == 5 + assert h.p_best_max == 0.25 + assert h.p_best_min == 0.125 + assert h.archive_factor == 1.0 + assert h.k_rank == 3.0 + assert h.adaptive_archive is True + assert h.p_F_init == _DEFAULT_P_F_INIT == 3.5 + assert h.p_F_final == _DEFAULT_P_F_FINAL == 1.5 + assert h.p_CR_init == _DEFAULT_P_CR_INIT == 1.0 + assert h.p_CR_final == _DEFAULT_P_CR_FINAL == 1.5 + assert h.m_lbc == _DEFAULT_M_LBC == 1.5 + assert h.name == "NLSHADE_LBC" + # F-cap inherited from jSO via NL-SHADE-RSP. + assert h.F_schedule is True + + def test_custom_construction(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC( + self.strategy, + NP_init=24, + NP_min=6, + H=4, + p_best_max=0.3, + p_best_min=0.1, + archive_factor=2.0, + k_rank=2.0, + adaptive_archive=False, + p_F_init=4.0, + p_F_final=2.0, + p_CR_init=0.8, + p_CR_final=1.8, + m_lbc=1.2, + seed=11, + name="MyLBC", + ) + assert h.NP_init == 24 + assert h.NP_min == 6 + assert h.H == 4 + assert h.p_best_max == 0.3 + assert h.k_rank == 2.0 + assert h.adaptive_archive is False + assert h.p_F_init == 4.0 + assert h.p_F_final == 2.0 + assert h.p_CR_init == 0.8 + assert h.p_CR_final == 1.8 + assert h.m_lbc == 1.2 + assert h.name == "MyLBC" + + def test_subclass_of_nl_shade_rsp_jso_lshade(self): + from panobbgo.heuristics.jso import JSO + from panobbgo.heuristics.lshade import LSHADE + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + from panobbgo.heuristics.nl_shade_rsp import NLSHADE_RSP + + h = NLSHADE_LBC(self.strategy) + assert isinstance(h, NLSHADE_RSP) + assert isinstance(h, JSO) + assert isinstance(h, LSHADE) + + def test_invalid_p_F_init(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + with pytest.raises(ValueError, match="p_F_init"): + NLSHADE_LBC(self.strategy, p_F_init=float("nan")) + with pytest.raises(ValueError, match="p_F_init"): + NLSHADE_LBC(self.strategy, p_F_init=float("inf")) + + def test_invalid_p_F_final(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + with pytest.raises(ValueError, match="p_F_final"): + NLSHADE_LBC(self.strategy, p_F_final=float("nan")) + + def test_invalid_p_CR_init(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + with pytest.raises(ValueError, match="p_CR_init"): + NLSHADE_LBC(self.strategy, p_CR_init=float("inf")) + + def test_invalid_p_CR_final(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + with pytest.raises(ValueError, match="p_CR_final"): + NLSHADE_LBC(self.strategy, p_CR_final=float("-inf")) + + def test_invalid_m_lbc(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + with pytest.raises(ValueError, match="m_lbc"): + NLSHADE_LBC(self.strategy, m_lbc=0.0) + with pytest.raises(ValueError, match="m_lbc"): + NLSHADE_LBC(self.strategy, m_lbc=-1.0) + with pytest.raises(ValueError, match="m_lbc"): + NLSHADE_LBC(self.strategy, m_lbc=float("nan")) + + def test_inherits_rsp_jso_validation(self): + """NL-SHADE-RSP / jSO H >= 2 and p_best ordering rules still apply.""" + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + with pytest.raises(ValueError, match="H must be >= 2"): + NLSHADE_LBC(self.strategy, H=1) + with pytest.raises(ValueError, match="p_best_min .* must be <= p_best_max"): + NLSHADE_LBC(self.strategy, p_best_max=0.2, p_best_min=0.3) + with pytest.raises(ValueError, match="k_rank"): + NLSHADE_LBC(self.strategy, k_rank=-0.1) + + +# ---------------------------------------------------------------------- +# Linear bias change schedule +# ---------------------------------------------------------------------- + + +class NLSHADELBCScheduleTests(_MockStrategyMixin, PanobbgoTestCase): + def test_exponent_endpoints_F(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy) + + # progress 0 → p_F_init, progress 1 → p_F_final. Use the + # strategy.results list to drive ``_progress()``. + self.strategy.results = [] + self.strategy.config.max_eval = 100 + assert h._lbc_exponent(h.p_F_init, h.p_F_final) == pytest.approx(h.p_F_init) + self.strategy.results = list(range(100)) + assert h._lbc_exponent(h.p_F_init, h.p_F_final) == pytest.approx(h.p_F_final) + + def test_exponent_linear_midpoint(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy) + self.strategy.config.max_eval = 100 + self.strategy.results = list(range(50)) # progress = 0.5 + mid = h._lbc_exponent(h.p_F_init, h.p_F_final) + assert mid == pytest.approx(0.5 * h.p_F_init + 0.5 * h.p_F_final) + + def test_exponent_clipped_to_unit_interval(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy) + self.strategy.config.max_eval = 100 + # Overspending — progress should clip to 1.0. + self.strategy.results = list(range(200)) + assert h._lbc_exponent(h.p_F_init, h.p_F_final) == pytest.approx(h.p_F_final) + + def test_exponent_fallback_when_budget_unknown(self): + """``_progress() is None`` → schedule returns p_init.""" + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy) + self.strategy.config.max_eval = 0 # unknown budget + assert h._lbc_exponent(h.p_F_init, h.p_F_final) == h.p_F_init + assert h._lbc_exponent(h.p_CR_init, h.p_CR_final) == h.p_CR_init + + +# ---------------------------------------------------------------------- +# Memory update (LBC Lehmer mean) +# ---------------------------------------------------------------------- + + +class NLSHADELBCMemoryUpdateTests(_MockStrategyMixin, PanobbgoTestCase): + def _seed_success_buffer(self, h, F_vals, CR_vals, deltas): + h._success_F = list(map(float, F_vals)) + h._success_CR = list(map(float, CR_vals)) + h._success_delta = list(map(float, deltas)) + + def test_update_memory_writes_to_writable_range_only(self): + """The jSO anchor bin (index H-1) must stay frozen at 0.9.""" + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=4) + h.on_start() + anchor_F_before = h._M_F[-1] + anchor_CR_before = h._M_CR[-1] + # Run several updates to make sure no write ever falls on H-1. + for _ in range(2 * h.H): + self._seed_success_buffer(h, [0.5, 0.7], [0.3, 0.6], [1.0, 2.0]) + h._update_memory() + assert h._M_F[-1] == anchor_F_before + assert h._M_CR[-1] == anchor_CR_before + + def test_update_memory_advances_pointer_modulo_writable(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=5) + h.on_start() + for expected in [1, 2, 3, 0, 1, 2]: # H-1 = 4 writable bins, wraps mod 4 + self._seed_success_buffer(h, [0.4], [0.6], [1.0]) + h._update_memory() + assert h._mem_ptr == expected + + def test_no_op_when_buffer_empty(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=4) + h.on_start() + snap_F = h._M_F.copy() + snap_CR = h._M_CR.copy() + h._update_memory() + np.testing.assert_array_equal(h._M_F, snap_F) + np.testing.assert_array_equal(h._M_CR, snap_CR) + + def test_F_memory_in_unit_interval(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=4) + h.on_start() + self.strategy.config.max_eval = 1000 + self.strategy.results = list(range(100)) # progress ~ 0.1 + self._seed_success_buffer( + h, + F_vals=[0.1, 0.5, 0.9, 0.2, 0.7], + CR_vals=[0.2, 0.4, 0.8, 0.5, 0.6], + deltas=[1.0, 2.0, 1.5, 0.5, 0.7], + ) + h._update_memory() + # The write went to bin 0 (initial _mem_ptr). + assert 0.0 <= h._M_F[0] <= 1.0 + assert 0.0 <= h._M_CR[0] <= 1.0 + + def test_F_memory_at_progress_zero_uses_p_init(self): + """At progress=0 the LBC mean uses p=p_F_init, m=m_lbc.""" + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=4, p_F_init=3.5, p_F_final=1.5, m_lbc=1.5) + h.on_start() + self.strategy.config.max_eval = 1000 + self.strategy.results = [] # progress = 0 + F_vals = np.array([0.3, 0.5, 0.9, 0.6]) + CR_vals = np.array([0.4, 0.5, 0.8, 0.3]) + deltas = np.array([1.0, 1.0, 1.0, 1.0]) + self._seed_success_buffer(h, F_vals, CR_vals, deltas) + h._update_memory() + w = deltas / deltas.sum() + expected = float(np.sum(w * F_vals**3.5) / np.sum(w * F_vals**2.0)) + assert h._M_F[0] == pytest.approx(expected, rel=1e-9) + + def test_F_memory_recovers_standard_lehmer_when_p2_m1(self): + """At p_F=2 (both ends) and m_lbc=1, the LBC formula recovers L-SHADE.""" + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC( + self.strategy, + H=4, + p_F_init=2.0, + p_F_final=2.0, + p_CR_init=2.0, + p_CR_final=2.0, + m_lbc=1.0, + ) + h.on_start() + self.strategy.config.max_eval = 1000 + self.strategy.results = list(range(500)) # progress = 0.5 — schedule still p=2 + F_vals = np.array([0.4, 0.7, 0.2]) + CR_vals = np.array([0.6, 0.3, 0.9]) + deltas = np.array([1.0, 2.0, 3.0]) + self._seed_success_buffer(h, F_vals, CR_vals, deltas) + h._update_memory() + w = deltas / deltas.sum() + expected_F = float(np.sum(w * F_vals * F_vals) / np.sum(w * F_vals)) + expected_CR = float(np.sum(w * CR_vals * CR_vals) / np.sum(w * CR_vals)) + assert h._M_F[0] == pytest.approx(expected_F, rel=1e-9) + assert h._M_CR[0] == pytest.approx(expected_CR, rel=1e-9) + + def test_CR_zero_terminal_sentinel(self): + """All-zero CR successes plant the terminal sentinel (-1).""" + from panobbgo.heuristics.lshade import _CR_TERMINAL + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=4) + h.on_start() + self._seed_success_buffer(h, [0.5, 0.7], [0.0, 0.0], [1.0, 1.0]) + h._update_memory() + assert h._M_CR[0] == _CR_TERMINAL + + def test_CR_terminal_bin_stays_terminal(self): + """Once the CR bin is the sentinel, subsequent updates leave it alone.""" + from panobbgo.heuristics.lshade import _CR_TERMINAL + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=4) + h.on_start() + h._M_CR[0] = _CR_TERMINAL # plant the sentinel at the writable bin + self._seed_success_buffer(h, [0.5], [0.6], [1.0]) + h._update_memory() + assert h._M_CR[0] == _CR_TERMINAL + + def test_CR_zero_entries_filtered_with_mixed_values(self): + """Mixed CR values: zeros skipped, LBC applied to positive subset.""" + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=4, p_CR_init=1.0, p_CR_final=1.0, m_lbc=1.5) + h.on_start() + self.strategy.config.max_eval = 1000 + self.strategy.results = [] # progress = 0 + # 1 zero + 3 positives. CR^(1-1.5)=CR^-0.5 is undefined at 0, so + # the zero must be filtered. + F_vals = np.array([0.3, 0.5, 0.7, 0.6]) + CR_vals = np.array([0.0, 0.4, 0.6, 0.8]) + deltas = np.array([1.0, 1.0, 1.0, 1.0]) + self._seed_success_buffer(h, F_vals, CR_vals, deltas) + # Should not blow up — the LBC update must filter zero CR entries. + h._update_memory() + assert np.isfinite(h._M_CR[0]) + assert 0.0 <= h._M_CR[0] <= 1.0 + # Cross-check: the filtered LBC formula applied to the 3 positives. + pos = CR_vals[1:] + w_pos = deltas[1:] / deltas[1:].sum() + expected = float(np.sum(w_pos * pos**1.0) / np.sum(w_pos * pos**-0.5)) + assert h._M_CR[0] == pytest.approx(expected, rel=1e-9) + + def test_uniform_weights_when_delta_total_zero(self): + """Zero-delta successes fall back to uniform weighting.""" + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, H=4, p_F_init=2.0, p_F_final=2.0, m_lbc=1.0) + h.on_start() + self.strategy.config.max_eval = 1000 + self.strategy.results = [] + F_vals = np.array([0.2, 0.4, 0.6]) + CR_vals = np.array([0.3, 0.5, 0.7]) + deltas = np.array([0.0, 0.0, 0.0]) + self._seed_success_buffer(h, F_vals, CR_vals, deltas) + h._update_memory() + # Uniform weights → expected = Σ(F^2)/Σ(F). + expected = float(np.sum(F_vals * F_vals) / np.sum(F_vals)) + assert h._M_F[0] == pytest.approx(expected, rel=1e-9) + + +# ---------------------------------------------------------------------- +# End-to-end pipeline smoke +# ---------------------------------------------------------------------- + + +class NLSHADELBCPipelineTests(_MockStrategyMixin, PanobbgoTestCase): + def test_on_start_emits_NP_init_points(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, NP_init=8, seed=0) + h.on_start() + emitted = h.get_points(limit=100) + assert len(emitted) == 8 + assert all(pt.who.startswith("NLSHADE_LBC:") for pt in emitted) + + def test_smoke_quadratic_no_regression(self): + """A few rounds on f(x)=||x||² makes no negative global progress.""" + from panobbgo.heuristics.lshade import _Dropped + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + from panobbgo.lib import Point, Result + + h = NLSHADE_LBC(self.strategy, NP_init=8, NP_min=4, seed=5) + h.on_start() + + def fx_of(x): + return float(np.dot(x, x)) + + items = list(h._pending.items()) + h.get_points(limit=100) + results = [Result(Point(x := self.problem.random_point(), f"NLSHADE_LBC:{rid}"), fx_of(x)) for rid, _m in items] + h.on_new_results(results) + h.get_points(limit=100) + best_before = min(s.fx for s in h._population if isinstance(s, Result)) + + for _round in range(20): + pending = list(h._pending.items()) + if not pending: + break + h.get_points(limit=200) + results = [] + for rid, meta in pending: + slot = h._population[meta.slot_idx] + if isinstance(slot, _Dropped) or slot is None: + continue + x = self.problem.project(np.asarray(slot.x) + 0.1 * np.random.randn(self.problem.dim)) + results.append(Result(Point(x, f"NLSHADE_LBC:{rid}"), fx_of(x))) + h.on_new_results(results) + + best_after = min(s.fx for s in h._population if isinstance(s, Result)) + assert best_after <= best_before + 1e-6 + + def test_restart_resets_memory_and_archive(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + + h = NLSHADE_LBC(self.strategy, NP_init=6, seed=2) + h.on_start() + h.get_points(limit=100) + h._rsp_archive_cap = 3 + h._archive.append(np.array([0.5, 0.5])) + h.on_restart(np.array([0.0, 0.0]), reason="test") + assert h._rsp_archive_cap is None + assert h._archive == [] + assert len(h._pending) == h.NP_init + + +# ---------------------------------------------------------------------- +# Byte-identical safety: NL-SHADE-RSP / jSO / L-SHADE behaviour unchanged +# ---------------------------------------------------------------------- + + +class NLSHADELBCInheritanceTests(_MockStrategyMixin, PanobbgoTestCase): + def test_nl_shade_rsp_uses_unchanged_lehmer_mean(self): + """NL-SHADE-RSP must not pick up the LBC override.""" + from panobbgo.heuristics.nl_shade_rsp import NLSHADE_RSP + + h = NLSHADE_RSP(self.strategy, H=4) + h.on_start() + h._success_F = [0.4, 0.6] + h._success_CR = [0.5, 0.7] + h._success_delta = [1.0, 1.0] + h._update_memory() + # Standard L-SHADE Lehmer mean with p=2, m=1. + F_vals = np.array([0.4, 0.6]) + expected = float(np.sum(F_vals * F_vals) / np.sum(F_vals)) + assert h._M_F[0] == pytest.approx(expected, rel=1e-9) + + +# ---------------------------------------------------------------------- +# Registration +# ---------------------------------------------------------------------- + + +class NLSHADELBCRegistrationTests(_MockStrategyMixin, PanobbgoTestCase): + def test_registered_in_heuristics_package(self): + import panobbgo.heuristics as h + + assert hasattr(h, "NLSHADE_LBC") + assert "NLSHADE_LBC" in h.__all__ + + def test_in_structural_catalog(self): + from panobbgo.heuristics.nl_shade_lbc import NLSHADE_LBC + from panobbgo.self_improve import StructuralMutationRule, default_structural_catalog + + catalog = default_structural_catalog() + add_rules = [r for r in catalog.rules if isinstance(r, StructuralMutationRule) and r.op == "add_heuristic"] + assert add_rules + has_lbc = any(cls is NLSHADE_LBC for rule in add_rules for cls, _ in (rule.candidate_classes or ())) + assert has_lbc + + def test_kwarg_catalog_has_lbc_dials(self): + from panobbgo.self_improve import MutationRule, default_catalog + + rules = default_catalog().rules + params = { + (r.class_name, r.param_name) for r in rules if isinstance(r, MutationRule) and r.class_name == "NLSHADE_LBC" + } + assert ("NLSHADE_LBC", "NP_init") in params + assert ("NLSHADE_LBC", "p_F_init") in params + assert ("NLSHADE_LBC", "p_F_final") in params + assert ("NLSHADE_LBC", "p_CR_init") in params + assert ("NLSHADE_LBC", "p_CR_final") in params + assert ("NLSHADE_LBC", "m_lbc") in params