-
Notifications
You must be signed in to change notification settings - Fork 84
fix(mesh): wire EpochMaxWins into CRDT merge #1469
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8ee1e75
cd81842
4baebd9
9b20166
d82a302
0b77522
3435ec5
beace98
c2092a7
74bc1a6
c17cf0f
936024b
a95df13
2af891a
269c066
9e26e0a
f6085e9
a5d017f
83cfe17
5071254
5ab5ba5
f9b2136
d4310b0
73c665b
cf38daf
6274a61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| /// Merge strategy for CRDT namespaces. Determines how conflicts are resolved | ||
| /// when two nodes write the same key concurrently. | ||
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||
| pub enum MergeStrategy { | ||
| /// Higher (version, replica_id) wins. Used for worker:*, policy:*, config:*. | ||
| LastWriterWins, | ||
| /// Compare epochs first, then max within same epoch. | ||
| /// | ||
| /// The raw write payload at the put boundary MUST be exactly 16 bytes: | ||
| /// epoch (u64 big-endian) followed by count (i64 big-endian). The CRDT | ||
| /// normalizes stored and replicated values into a richer | ||
| /// `RateLimitShard` form internally (live-point frontier plus an | ||
| /// embedded tombstone boundary), so values observed by `get`, gossip | ||
| /// peers, and subscribers after normalization are larger than 16 | ||
| /// bytes. | ||
| EpochMaxWins, | ||
| } | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,8 +1,8 @@ | ||
| use std::collections::{HashMap, HashSet}; | ||
| use std::collections::HashMap; | ||
|
|
||
| use serde::{Deserialize, Serialize}; | ||
|
|
||
| use super::replica::ReplicaId; | ||
| use super::{epoch_max_wins, merge_strategy::MergeStrategy, replica::ReplicaId}; | ||
|
|
||
| // ============================================================================ | ||
| // Operation Type Definition - Atomic Unit of State Change | ||
|
|
@@ -112,9 +112,16 @@ impl OperationLog { | |
| /// new appends, not on every append. If compaction doesn't reduce below | ||
| /// threshold (very high key cardinality), the oldest entries are truncated. | ||
| pub fn append(&mut self, operation: Operation) { | ||
| self.append_with_strategy(operation, |_| MergeStrategy::LastWriterWins); | ||
| } | ||
|
|
||
| pub(super) fn append_with_strategy<F>(&mut self, operation: Operation, strategy_for_key: F) | ||
| where | ||
| F: Fn(&str) -> MergeStrategy, | ||
| { | ||
| self.operations.push(operation); | ||
| if self.operations.len() > Self::AUTO_COMPACT_THRESHOLD { | ||
| self.compact(); | ||
| self.compact_with_strategy(strategy_for_key); | ||
| // If still over threshold after dedup (extremely high key cardinality | ||
| // >10K unique keys), truncate oldest entries. This drops state for the | ||
| // oldest keys, which will be re-synced from peers on the next merge. | ||
|
|
@@ -159,34 +166,59 @@ impl OperationLog { | |
| self.operations.is_empty() | ||
| } | ||
|
|
||
| fn latest_operations_by_key(&self) -> HashMap<String, Operation> { | ||
| let mut latest_by_key: HashMap<String, Operation> = HashMap::new(); | ||
| fn latest_lww_operation<'a, I>(operations: I) -> Option<&'a Operation> | ||
| where | ||
| I: IntoIterator<Item = &'a Operation>, | ||
| { | ||
| operations | ||
| .into_iter() | ||
| .max_by_key(|operation| (operation.timestamp(), operation.replica_id())) | ||
| } | ||
|
|
||
| fn latest_epoch_max_wins_operation<'a>( | ||
| operations: impl IntoIterator<Item = &'a Operation>, | ||
| ) -> Option<Operation> { | ||
| epoch_max_wins::compact_operations(operations) | ||
| } | ||
|
|
||
| fn latest_operations_by_key_with_strategy<F>( | ||
| &self, | ||
| strategy_for_key: F, | ||
| ) -> HashMap<String, Operation> | ||
| where | ||
| F: Fn(&str) -> MergeStrategy, | ||
| { | ||
| let mut operations_by_key: HashMap<String, Vec<&Operation>> = HashMap::new(); | ||
|
|
||
| for operation in &self.operations { | ||
| let key = operation.key().to_string(); | ||
| match latest_by_key.get(&key) { | ||
| Some(current) | ||
| if (current.timestamp(), current.replica_id()) | ||
| >= (operation.timestamp(), operation.replica_id()) => {} | ||
| _ => { | ||
| latest_by_key.insert(key, operation.clone()); | ||
| } | ||
| } | ||
| operations_by_key | ||
| .entry(operation.key().to_string()) | ||
| .or_default() | ||
| .push(operation); | ||
| } | ||
|
|
||
| latest_by_key | ||
| operations_by_key | ||
| .into_iter() | ||
| .filter_map(|(key, operations)| { | ||
| let latest = match strategy_for_key(&key) { | ||
| MergeStrategy::LastWriterWins => { | ||
| Self::latest_lww_operation(operations).cloned() | ||
| } | ||
| MergeStrategy::EpochMaxWins => { | ||
| Self::latest_epoch_max_wins_operation(operations) | ||
| } | ||
| }?; | ||
| Some((key, latest)) | ||
| }) | ||
| .collect() | ||
| } | ||
|
|
||
| /// Keep only latest operation per key to bound log growth. | ||
| /// | ||
| /// This uses `latest_operations_by_key` LWW tie-breaking by `(timestamp, ReplicaId)`. | ||
| /// As a result, concurrent operations may be compacted away deterministically, so | ||
| /// `compact()` + `merge()` can be non-idempotent in raw log contents even though | ||
| /// `apply_operation` and `operation_id` guards keep state semantics safe. | ||
| /// Stronger concurrency retention would require vector-clock/version-vector metadata. | ||
| pub fn compact(&mut self) { | ||
| pub(super) fn compact_with_strategy<F>(&mut self, strategy_for_key: F) | ||
| where | ||
| F: Fn(&str) -> MergeStrategy, | ||
| { | ||
| self.operations = self | ||
| .latest_operations_by_key() | ||
| .latest_operations_by_key_with_strategy(strategy_for_key) | ||
| .into_values() | ||
| .collect::<Vec<_>>(); | ||
| self.operations | ||
|
|
@@ -199,9 +231,12 @@ impl OperationLog { | |
| .retain(|operation| operation.timestamp() > watermark); | ||
| } | ||
|
|
||
| /// Build a latest-state snapshot and clear the operation log. | ||
| pub fn snapshot_and_truncate(&mut self) -> HashMap<String, Operation> { | ||
| let snapshot = self.latest_operations_by_key(); | ||
| /// Build a latest-state snapshot with the configured merge strategy and clear the operation log. | ||
| pub fn snapshot_and_truncate<F>(&mut self, strategy_for_key: F) -> HashMap<String, Operation> | ||
| where | ||
| F: Fn(&str) -> MergeStrategy, | ||
| { | ||
| let snapshot = self.latest_operations_by_key_with_strategy(strategy_for_key); | ||
| self.operations.clear(); | ||
| snapshot | ||
| } | ||
|
|
@@ -233,20 +268,43 @@ impl OperationLog { | |
| } | ||
| } | ||
|
|
||
| /// Merge another operation log. | ||
| /// | ||
| /// INVARIANT: `Operation::operation_id()` (`ReplicaId`, `timestamp`) is unique per operation | ||
| /// because each replica's `LamportClock::tick()` is monotonic and never repeats a timestamp. | ||
| pub fn merge(&mut self, other: &OperationLog) { | ||
| let mut seen_ids: HashSet<(ReplicaId, u64)> = self | ||
| /// Per-key strategy-aware merge. For `EpochMaxWins` keys, an incoming | ||
| /// operation that collides on `(replica_id, timestamp)` with an existing | ||
| /// local op is folded via `epoch_max_wins::compact_operations` so a | ||
| /// compacted payload (carrying an embedded tombstone_version or richer | ||
| /// frontier) replaces the older raw payload at the same op id. LWW keys | ||
| /// dedup by op id. | ||
| pub(super) fn merge_with_strategy<F>(&mut self, other: &OperationLog, strategy_for_key: F) | ||
| where | ||
| F: Fn(&str) -> MergeStrategy, | ||
| { | ||
| let mut local_index: HashMap<(ReplicaId, u64), usize> = self | ||
| .operations | ||
| .iter() | ||
| .map(Operation::operation_id) | ||
| .enumerate() | ||
| .map(|(idx, op)| (op.operation_id(), idx)) | ||
| .collect(); | ||
|
|
||
| for operation in &other.operations { | ||
| if seen_ids.insert(operation.operation_id()) { | ||
| self.operations.push(operation.clone()); | ||
| let op_id = operation.operation_id(); | ||
| match local_index.get(&op_id).copied() { | ||
| None => { | ||
| local_index.insert(op_id, self.operations.len()); | ||
| self.operations.push(operation.clone()); | ||
| } | ||
| Some(local_idx) => { | ||
| if matches!( | ||
| strategy_for_key(operation.key()), | ||
| MergeStrategy::EpochMaxWins | ||
| ) { | ||
| let local_op = self.operations[local_idx].clone(); | ||
| if let Some(folded) = | ||
| epoch_max_wins::compact_operations([&local_op, operation]) | ||
| { | ||
| self.operations[local_idx] = folded; | ||
| } | ||
|
Comment on lines
+281
to
+305
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Include
🛠️ Localized fix- let mut local_index: HashMap<(ReplicaId, u64), usize> = self
+ let mut local_index: HashMap<(String, ReplicaId, u64), usize> = self
.operations
.iter()
.enumerate()
- .map(|(idx, op)| (op.operation_id(), idx))
+ .map(|(idx, op)| {
+ (
+ (op.key().to_owned(), op.replica_id(), op.timestamp()),
+ idx,
+ )
+ })
.collect();
for operation in &other.operations {
- let op_id = operation.operation_id();
+ let op_id = (
+ operation.key().to_owned(),
+ operation.replica_id(),
+ operation.timestamp(),
+ );
match local_index.get(&op_id).copied() {Based on learnings, use 🤖 Prompt for AI Agents
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Declining for this PR. The repro requires one replica producing two operations at the same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If you ever want to make the assumption explicit, a one-liner 🐇 (a little deflated, but fair enough) ✏️ Learnings added
🧠 Learnings used |
||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.