VollcomDigital · AlexanderPietsch · Apr 15, 2026 · Apr 15, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/README.md b/README.md
@@ -254,6 +254,22 @@ See new collection examples under `config/collections/` for FX intraday via Finn
   - collection-level overrides are supported via `collections[].validation.optimization`
     and are resolved against global `validation.optimization` during config loading.
 - `validation.result_consistency` controls strategy-result concentration checks:
+  - `data_integrity_audit` (optional thresholds module; gate is active when `collections[].reference_source` is set):
+    - purpose: compare canonicalized bars from the primary `source` and a secondary `reference_source`
+      to catch bad prints / ghost bars before accepting strategy results
+    - source routing:
+      - primary fetch uses `collections[].source` (+ `collections[].exchange` for ccxt)
+      - reference fetch uses `collections[].reference_source`
+      - for ccxt-vs-ccxt venue comparisons, set:
+        - `reference_source: ccxt`
+        - `reference_exchange: <venue>`
+      - when `reference_source` is ccxt and `reference_exchange` is unset, the runner falls back to
+        `collections[].exchange`
+    - `min_overlap_ratio` (optional, default `0.99`, `0..1`): minimum fraction of primary-source bars that must have matching reference-source timestamps (`overlap_bars / primary_bars`)
+    - `max_median_ohlc_diff_bps` (optional, default `5.0`, `>=0`): maximum allowed median OHLC drift (bps)
+    - `max_p95_ohlc_diff_bps` (optional, default `20.0`, `>=0`): maximum allowed p95 OHLC drift (bps)
+    - action: fixed to `reject_result` when overlap/drift thresholds are breached (or comparison is indeterminate)
+    - diagnostics are attached under `post_run_meta.data_integrity_audit`
   - `outlier_dependency` (optional module; active when configured):
     - `slices` (required, `>=2`): number of equal time-slices used for diagnostics
     - `profit_share_threshold` (required, `0..1`)
@@ -297,7 +313,7 @@ Structured logs reflect this directly via gate actions:
 - `data_validation_gate` can emit `skip_optimization` (job-level optimization disable).
 - `strategy_optimization_gate` can emit `baseline_only` (strategy-level baseline fallback) or `skip_job`.
 - `strategy_validation_gate` can emit `reject_result` for outlier dependency,
-  execution price variance, and lookahead shuffle testing.
+  execution price variance, lookahead shuffle testing, data integrity audit, and transaction-cost robustness.
 
 Numeric config parsing follows `src/config.py` coercion helpers:
 - numeric fields are strict types: use YAML numbers, not quoted numeric strings

diff --git a/config/example.yaml b/config/example.yaml
@@ -54,6 +54,10 @@ validation:
   result_consistency:
     min_metric: 0.5  # fail fast: require at least this metric before expensive checks
     min_trades: 20  # fail fast: require at least this many closed trades
+    data_integrity_audit:
+      min_overlap_ratio: 0.99  # min fraction of primary bars covered by reference timestamps (overlap_bars / primary_bars)
+      max_median_ohlc_diff_bps: 5.0  # median OHLC drift tolerance (bps)
+      max_p95_ohlc_diff_bps: 20.0  # tail OHLC drift tolerance (bps)
     outlier_dependency:
       slices: 5  # split trade history into N equal time-slices for diagnostics
       profit_share_threshold: 0.80
@@ -80,6 +84,7 @@ collections:
   # Stocks (large-cap growth)
   - name: stocks_large_cap_growth
     source: yfinance
+    reference_source: twelvedata  # optional golden source for post-run data-integrity audit
     symbols: ["CNDX.L", "AAPL", "MSFT", "NVDA"]
     fees: 0.0005  # approx IBKR
     slippage: 0.0005
@@ -100,8 +105,13 @@ collections:
 
   # Crypto (Binance via ccxt)
   - name: crypto
-    source: binance
+    source: ccxt
+    # For ccxt collections, `exchange` selects the primary venue adapter.
+    # Set `reference_source: ccxt` + `reference_exchange` to compare venues
+    # in data_integrity_audit (for example Binance vs Bybit).
+    reference_source: ccxt
     exchange: binance
+    reference_exchange: bybit
     quote: USDT
     symbols: ["BTC/USDT", "ETH/USDT", "BNB/USDT", "SOL/USDT"]
     fees: 0.0006  # approx Bybit/Binance taker