Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
2143c18
Add prop_leios_late_join failing test
dnadales May 13, 2026
a2ea475
Add hbMayCertifiedEb to HeaderBody
dnadales May 13, 2026
af9ce50
Filter pending CertRBs from ChainSel
dnadales May 13, 2026
ec16b11
Add chain-consistency assertion to prop_leios_late_join
dnadales May 14, 2026
75ef78b
Re-trigger ChainSel when EB closure arrives
dnadales May 14, 2026
525a3ef
Fetch missing CertRB closures on late-joining nodes
dnadales May 14, 2026
dd47734
Cap late-join slot at numSlots/4 in prop_leios_late_join
dnadales May 14, 2026
0ad359d
Sweep pending CertRBs in leiosFetchLogic
dnadales May 15, 2026
ca87a5b
Recheck closure after cdbPendingEBs insert
dnadales May 15, 2026
497122f
Fix Werror warnings in NodeKernel after late-join changes
dnadales May 15, 2026
181cdf6
Document CertRB-candidate peer-fallback invariant
dnadales May 15, 2026
cdc3fad
Bracket LeiosDbConnection in ebCompletionRunner
dnadales May 15, 2026
8311774
Factor pendingEbReconciler discipline into LeiosDemoTypes
dnadales May 15, 2026
20a6571
Cover len-12 HeaderBody CBOR path in generators
dnadales May 15, 2026
bcc9089
Add dedicated trace events for ChainSelReprocessBlock
dnadales May 15, 2026
8414ae3
Key ChainSelReprocessBlock removal by LeiosPoint
dnadales May 15, 2026
b64f327
Strip plan-step references from prop_leios_late_join comments
dnadales May 15, 2026
3438542
Drop CDB{..} wildcards from late-join code paths
dnadales May 15, 2026
8fc0444
Document candidateCertEbs construction in leiosFetchLogic
dnadales May 15, 2026
1d339da
Tolerate empty Shelley credentials in voting-key derivation
dnadales May 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,9 @@ import Ouroboros.Consensus.Protocol.Praos (Praos, PraosCrypto)
import Ouroboros.Consensus.Protocol.TPraos (TPraos)
import Ouroboros.Consensus.Shelley.Eras
import Ouroboros.Consensus.Shelley.Ledger (ShelleyBlock, ShelleyCompatible)
import Ouroboros.Consensus.Storage.LedgerDB (ResolveLeiosBlock (resolveLeiosBlock))
import Ouroboros.Consensus.Storage.LedgerDB
( ResolveLeiosBlock (certifiedEbFromHeader, resolveLeiosBlock)
)
import Ouroboros.Consensus.TypeFamilyWrappers

{-------------------------------------------------------------------------------
Expand Down Expand Up @@ -1403,3 +1405,7 @@ instance
injectConwayBlock
<$> resolveLeiosBlock db conwayHdrSt conwayBlk
resolveLeiosBlock _ _ blk = return blk

certifiedEbFromHeader (HeaderConway conwayHdr) =
certifiedEbFromHeader conwayHdr
certifiedEbFromHeader _ = Nothing
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ import Control.Exception (assert)
import qualified Data.ByteString.Short as Short
import Data.Functor.These (These1 (..))
import qualified Data.Map.Strict as Map
import Data.Maybe (listToMaybe)
import Data.SOP.BasicFunctors
import Data.SOP.Counting
import Data.SOP.Functors (Flip (..))
Expand Down Expand Up @@ -814,11 +815,12 @@ protocolInfoCardano paramsCardano
(Shelley.ShelleyStorageConfig tpraosSlotsPerKESPeriod k)
, topLevelConfigCheckpoints = cardanoCheckpoints
, -- FIXME: REMOVE THIS. Accesses and re-uses KES signing key material.
-- For nodes without Shelley-based leader credentials (e.g. relays),
-- there is no key material to re-use, so the voting key is Nothing.
topLevelConfigVotingKey =
Just
. rawSerialiseUnsoundPureSignKeyKES
rawSerialiseUnsoundPureSignKeyKES
. shelleyLeaderCredentialsInitSignKey
$ credssShelleyBased !! 0
<$> listToMaybe credssShelleyBased
}

-- When the initial ledger state is not in the Byron era, register various
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ forgeShelleyBlock hotKey cbl mayLeiosInfo ForgeBlockArgs{..} = do
(SL.bodyBytesSize protocolVersion body)
protocolVersion
(snd <$> mayForgedEbAnn)
Nothing

let blk =
mkShelleyBlock $
Expand All @@ -116,6 +117,7 @@ forgeShelleyBlock hotKey cbl mayLeiosInfo ForgeBlockArgs{..} = do
(SL.bodyBytesSize protocolVersion body)
protocolVersion
Nothing -- FIXME(bladyjoker): Skip announcement when certifying https://github.com/input-output-hk/ouroboros-leios/issues/838
(Just $ Leios.leiosCertificateEbPoint cert)
let blk =
mkShelleyBlock $
SL.Block
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,10 @@ import Ouroboros.Consensus.Protocol.Praos
, PraosState (praosStateLastSlot, praosStateLeios)
, withOriginToSlotNo
)
import Ouroboros.Consensus.Protocol.Praos.Header
( Header (headerBody)
, HeaderBody (hbMayCertifiedEb)
)
import Ouroboros.Consensus.Protocol.TPraos (TPraos)
import Ouroboros.Consensus.Shelley.Ledger.Block
import Ouroboros.Consensus.Shelley.Ledger.Config
Expand All @@ -158,6 +162,7 @@ import Ouroboros.Consensus.Shelley.Protocol.Abstract
, envelopeChecks
, mkHeaderView
)
import Ouroboros.Consensus.Shelley.Protocol.Praos ()
import Ouroboros.Consensus.Storage.LedgerDB
import Ouroboros.Consensus.Util.CBOR
( decodeWithOrigin
Expand Down Expand Up @@ -372,6 +377,9 @@ instance
}
resolveLeiosBlock _leiosDb _hdrSt blk = return blk

certifiedEbFromHeader (ShelleyHeader rawHdr _) =
hbMayCertifiedEb (headerBody rawHdr)

deserialiseShelleyTx :: forall era. ShelleyBasedEra era => BS.ByteString -> Core.Tx era
deserialiseShelleyTx bs = case CB.decodeFullAnnotator (Core.eraProtVerLow @era) "Leios Tx" CB.decCBOR (BL.fromStrict bs) of
Left err -> error $ "Failed to deserialise Tx era: " <> show err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ import Data.Kind (Type)
import Data.Typeable (Typeable)
import Data.Word (Word64)
import GHC.Generics (Generic)
import LeiosDemoTypes (EbAnnouncement)
import LeiosDemoTypes (EbAnnouncement, LeiosPoint)
import NoThunks.Class (NoThunks)
import Numeric.Natural (Natural)
import Ouroboros.Consensus.Protocol.Abstract
Expand Down Expand Up @@ -165,6 +165,8 @@ class ProtocolHeaderSupportsKES proto where
ProtVer ->
-- | Leios EB announcement
Maybe EbAnnouncement ->
-- | Leios certified EB
Maybe LeiosPoint ->
m (ShelleyProtocolHeader proto)

-- | ProtocolHeaderSupportsProtocol` provides support for the concrete
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ instance PraosCrypto c => ProtocolHeaderSupportsKES (Praos c) where
currentKesPeriod - startOfKesPeriod
| otherwise =
0
mkHeader hk cbl il slotNo blockNo prevHash bodyHash bodySize protVer mayEbAnnouncement = do
mkHeader hk cbl il slotNo blockNo prevHash bodyHash bodySize protVer mayEbAnnouncement mayCertifiedEb = do
PraosFields{praosSignature, praosToSign} <- forgePraosFields hk cbl il mkBhBodyBytes
pure $ Header praosToSign praosSignature
where
Expand All @@ -167,6 +167,7 @@ instance PraosCrypto c => ProtocolHeaderSupportsKES (Praos c) where
, hbOCert = praosToSignOCert
, hbProtVer = protVer
, hbMayEbAnnouncement = mayEbAnnouncement
, hbMayCertifiedEb = mayCertifiedEb
}

instance PraosCrypto c => ProtocolHeaderSupportsProtocol (Praos c) where
Expand All @@ -182,6 +183,7 @@ instance PraosCrypto c => ProtocolHeaderSupportsProtocol (Praos c) where
, hvSigned = headerBody
, hvSignature = headerSig
, hvMayEbAnnouncement = hbMayEbAnnouncement headerBody
, hvMayCertifiedEb = hbMayCertifiedEb headerBody
}
pHeaderIssuer = hbVk . headerBody
pHeaderIssueNo = SL.ocertN . hbOCert . headerBody
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ instance PraosCrypto c => ProtocolHeaderSupportsKES (TPraos c) where
currentKesPeriod - startOfKesPeriod
| otherwise =
0
mkHeader hotKey canBeLeader isLeader curSlot curNo prevHash bbHash actualBodySize protVer _mayEbAnnouncement = do
mkHeader hotKey canBeLeader isLeader curSlot curNo prevHash bbHash actualBodySize protVer _mayEbAnnouncement _mayCertifiedEb = do
TPraosFields{tpraosSignature, tpraosToSign} <-
forgeTPraosFields hotKey canBeLeader isLeader mkBhBody
pure $ SL.BHeader tpraosToSign tpraosSignature
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ fromShelleyLedgerExamplesPraos
, hbOCert = SL.bheaderOCert bhBody
, hbProtVer = SL.bprotver bhBody
, hbMayEbAnnouncement = Nothing
, hbMayCertifiedEb = Nothing
}
hSig = coerce bhSig
hash = ShelleyHash $ SL.unHashHeader sleHashHeader
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ instance
, Praos.hbOCert = SL.bheaderOCert bhBody
, Praos.hbProtVer = SL.bprotver bhBody
, Praos.hbMayEbAnnouncement = Nothing
, Praos.hbMayCertifiedEb = Nothing
}
hSig = coerce bhSig

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,10 @@ import Test.Consensus.Cardano.ProtocolInfo (Era (Conway), hardForkInto)
import Test.QuickCheck
( Property
, Testable
, choose
, conjoin
, counterexample
, forAll
, tabulate
, (.&&.)
, (.||.)
Expand Down Expand Up @@ -137,7 +139,7 @@ import Test.ThreadNet.Network
, TraceThreadNetNode (FromLeios, FromLeiosPeer, FromMempool)
)
import Test.ThreadNet.TxGen.Cardano (CardanoTxGenExtra (..))
import Test.ThreadNet.Util.NodeJoinPlan (trivialNodeJoinPlan)
import Test.ThreadNet.Util.NodeJoinPlan (NodeJoinPlan (..), trivialNodeJoinPlan)
import Test.ThreadNet.Util.NodeRestarts (noRestarts)
import Test.ThreadNet.Util.NodeToNodeVersion (newestVersion)
import Test.ThreadNet.Util.NodeTopology (meshNodeTopology)
Expand All @@ -152,6 +154,8 @@ tests =
"Leios ThreadNet"
[ adjustQuickCheckTests (`div` 10) $
testProperty "basic functionality" prop_leios
, adjustQuickCheckTests (`div` 10) $
testProperty "late join" prop_leios_late_join
]

-- | Verify a suite of basic Leios ThreadNet invariants in a single run:
Expand Down Expand Up @@ -183,7 +187,9 @@ prop_leios seed =
numSlots = 200 :: Word64

(testOutput, ProtocolInfo{pInfoConfig, pInfoInitLedger}) =
runThreadNet seed (NumSlots numSlots) (NumCoreNodes $ fromIntegral numNodes)
runThreadNet seed (NumSlots numSlots) numCoreNodes (trivialNodeJoinPlan numCoreNodes)

numCoreNodes = NumCoreNodes $ fromIntegral numNodes

traces = testOutput.allTraces

Expand Down Expand Up @@ -334,6 +340,53 @@ prop_leios seed =
| (s1, s2) <- zip certifyingBlocks (drop 1 certifyingBlocks)
]

-- | Late-joining node must not crash.
--
-- 4 nodes, 200 slots. Nodes 0–2 join at slot 0; node 3 joins at a random
-- slot. On the current codebase this crashes in 'resolveLeiosBlock' because
-- the late node receives a CertRB referencing an EB it never saw.
prop_leios_late_join :: Seed -> Property
prop_leios_late_join seed =
-- Cap the join slot at numSlots/4 so the late node always has at least 3/4
-- of the run to catch up. Otherwise samples near numSlots would fail the
-- chain-consistency assertion for catch-up-bandwidth reasons unrelated to
-- the late-join logic under test.
forAll (choose (1, fromIntegral numSlots `div` 4)) $ \lateJoinSlot ->
let
joinPlan =
NodeJoinPlan $
Map.fromList
[ (CoreNodeId 0, SlotNo 0)
, (CoreNodeId 1, SlotNo 0)
, (CoreNodeId 2, SlotNo 0)
, (CoreNodeId 3, SlotNo $ fromIntegral (lateJoinSlot :: Int))
]

numCoreNodes = NumCoreNodes 4

(testOutput, _) =
runThreadNet seed (NumSlots numSlots) numCoreNodes joinPlan

nodeChains =
Chain.toOldestFirst . nodeOutputFinalChain <$> testOutput.testOutputNodes
in
-- The simulation must not throw: ChainSel skips a CertRB whose EB
-- closure is missing instead of crashing in resolveLeiosBlock.
conjoin
[ not (null nodeChains)
& counterexample "test output was empty"
, -- All nodes should converge to the same chain. Without the
-- ChainSel re-trigger on EB closure arrival ('ebCompletionRunner'),
-- the late node's chain stays shorter because the filtered
-- CertRBs are never reconsidered.
all (== head (Map.elems nodeChains)) nodeChains
& counterexample "nodes have different chains"
& counterexample ("chain lengths: " <> show (fmap length nodeChains))
]
& counterexample ("late join slot: " <> show lateJoinSlot)
where
numSlots = 200 :: Word64

-- | Independently compute cumulative tx bytes by resolving each block in the
-- chain (filling in EB closures from the LeiosDB) and summing individual
-- 'sizeTxF' values per transaction.
Expand Down Expand Up @@ -420,8 +473,9 @@ runThreadNet ::
Seed ->
NumSlots ->
NumCoreNodes ->
NodeJoinPlan ->
(TestOutput (CardanoBlock StandardCrypto), ProtocolInfo (CardanoBlock StandardCrypto))
runThreadNet initSeed numSlots numCoreNodes =
runThreadNet initSeed numSlots numCoreNodes joinPlan =
( runTestNetwork
testConfig
testConfigB
Expand Down Expand Up @@ -503,7 +557,7 @@ runThreadNet initSeed numSlots numCoreNodes =
{ forgeEbbEnv = Nothing
, future = EraFinal slotLength shelleyGenesis.sgEpochLength
, messageDelay = noCalcMessageDelay
, nodeJoinPlan = trivialNodeJoinPlan numCoreNodes
, nodeJoinPlan = joinPlan
, nodeRestarts = noRestarts
, txGenExtra =
CardanoTxGenExtra
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ import Control.Concurrent.Class.MonadSTM.Strict (readTChan)
import qualified Data.ByteString as BS
import Data.Map (Map)
import qualified Data.Map as Map
import qualified Data.Set as Set
import LeiosDemoDb
( LeiosDbConnection
, LeiosDbHandle (..)
Expand All @@ -179,6 +180,7 @@ import LeiosDemoTypes
( ForgedLeiosEb
, LeiosOutstanding
, LeiosPeerVars
, LeiosPoint (..)
, LeiosVote (..)
, TraceLeiosKernel (..)
, VoterId (..)
Expand Down Expand Up @@ -419,6 +421,25 @@ initNodeKernel
getLeiosOutstanding <- MVar.newMVar Leios.emptyLeiosOutstanding -- TODO init from DB
getLeiosReady <- MVar.newEmptyMVar

-- Mirror cdbPendingEBs into Leios missingEbBodies with size 0 so that the
-- LeiosFetch client tries to fetch closures for CertRBs that ChainSel is
-- holding back.
lastAppliedPendingEBs <- newTVarIO Map.empty
void $
forkLinkedThread registry "NodeKernel.pendingEbReconciler" $
forever $ do
(added, removed) <- atomically $ do
new <- ChainDB.getPendingCertRBs chainDB
old <- readTVar lastAppliedPendingEBs
when (new == old) retry
writeTVar lastAppliedPendingEBs new
pure (Map.difference new old, Map.difference old new)
MVar.modifyMVar_ getLeiosOutstanding $
pure
. Leios.applyPendingRemoved (Map.keys removed)
. Leios.applyPendingAdded (Map.keys added)
void $ MVar.tryPutMVar getLeiosReady ()

void $
forkLinkedThread registry "NodeKernel.leiosFetchLogic" $ do
leiosConn <- allocate_ registry (LeiosDb.open leiosDB) LeiosDb.close
Expand All @@ -427,8 +448,65 @@ initNodeKernel
traceWith tracer $ MkTraceLeiosKernel "leiosFetchLogic: wait for leios ready"
() <- MVar.takeMVar getLeiosReady
iterationStart <- getMonotonicTime
-- Sweep cdbPendingEBs for CertRBs whose EB closure has since
-- become available locally, and re-enqueue them for ChainSel.
-- ebCompletionRunner already does this when a notification fires,
-- but it can miss the window if the closure completes between
-- ChainSel's "is the closure present?" check and its insert into
-- cdbPendingEBs. The sweep covers that race and any other
-- missed-notification scenarios (e.g. subscription startup gap).
--
-- An inline recheck in ChainSel.hs (the add-block path that
-- inserts into cdbPendingEBs) closes the immediate race window
-- so callers don't have to wait for the next sweep tick; this
-- sweep is the load-bearing fix and also handles cases the
-- inline recheck can't see (e.g. closure arrival just after a
-- prior pending-insert).
pending <- atomically $ ChainDB.getPendingCertRBs chainDB
forM_ (Map.toList pending) $ \(leiosPoint, certRBHash) -> do
mayComplete <- LeiosDb.leiosDbQueryCompletedEbByPoint leiosConn leiosPoint
when (isJust mayComplete) $
void $ ChainDB.addReprocessBlock chainDB leiosPoint certRBHash
leiosPeersVars <- MVar.readMVar getLeiosPeersVars
offerings <- mapM (MVar.readMVar . Leios.offerings) leiosPeersVars
-- Per-peer set of EB hashes the peer has certified on its
-- selected chain. Used as a fallback peer source in
-- 'choosePeerEb' / 'choosePeerTx' when no peer has actively
-- offered the EB body or closure.
--
-- Load-bearing invariant: if a peer's candidate fragment
-- contains a CertRB, that peer has the EB closure. This rests
-- on the upstream's ChainSync server only emitting
-- selected-chain headers, the upstream's ChainSel filtering
-- CertRBs with missing closures (cdbPendingEBs), and LeiosDB
-- not GC'ing closures. See the fallback branches in
-- 'choosePeerEb' / 'choosePeerTx' in 'LeiosDemoLogic'.
let
-- For one peer, scan its candidate fragment and collect the
-- hashes of every EB that any header on the candidate
-- certifies. Only the EB hash matters for fetch-decision
-- keying; the slot is discarded.
certifiedEbsFromCandidate csHandle = do
state <- readTVar (cschState csHandle)
pure $
Set.fromList
-- Singleton-list generator with a refutable pattern:
-- each header that does not certify an EB is silently
-- skipped.
[ pointEbHash
| hwt <- AF.toOldestFirst (csCandidate state)
, Just MkLeiosPoint{pointEbHash} <-
[LedgerDB.certifiedEbFromHeader (hwtHeader hwt)]
]
candidateCertEbs <- atomically $ do
-- Snapshot of all live ChainSync clients (one handle per
-- upstream peer).
handles <- cschcMap varChainSyncHandles
-- Re-key from 'ConnectionId' to Leios's 'PeerId' newtype.
-- 'mapKeysMonotonic' is safe because 'MkPeerId' is just an
-- order-preserving wrapper.
fmap (Map.mapKeysMonotonic Leios.MkPeerId) $
forM handles certifiedEbsFromCandidate
newDecisions <- MVar.modifyMVar getLeiosOutstanding $ \outstanding -> do
-- Filter outstanding work against DB before running fetch iteration.
-- This removes EBs and TXs we already have (e.g., from forging or other peers).
Expand All @@ -438,6 +516,7 @@ initNodeKernel
Leios.leiosFetchLogicIteration
Leios.demoLeiosFetchStaticEnv
offerings
candidateCertEbs
filteredOutstanding
pure (outstanding', newDecisions)
traceWith tracer $ MkTraceLeiosKernel $ "leiosFetchLogic: decided"
Expand Down
Loading
Loading