From d381439b41480e94583eb3c6385d9863f36295a9 Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Wed, 18 Feb 2026 16:25:58 +0100 Subject: [PATCH 1/9] CNDB-15669: Implementation of the MAY_HAVE_CONTENT_BIT optimization flag in trie cursors. This metadata flag allows skipping content lookups for structural nodes and is correctly propagated and unioned across all cursor implementations. --- .../db/tries/CollectionMergeCursor.java | 47 ++- .../org/apache/cassandra/db/tries/Cursor.java | 54 ++- .../db/tries/DeletionAwareCursor.java | 14 +- .../db/tries/DeletionAwareMergeSource.java | 2 +- .../db/tries/DepthAdjustedCursor.java | 327 +++++++++--------- .../db/tries/FlexibleMergeCursor.java | 9 +- .../cassandra/db/tries/InMemoryRangeTrie.java | 8 +- .../cassandra/db/tries/InMemoryReadTrie.java | 15 +- .../db/tries/IntersectionCursor.java | 9 + .../cassandra/db/tries/MergeCursor.java | 9 +- .../cassandra/db/tries/PrefixedCursor.java | 9 +- .../cassandra/db/tries/RangeApplyCursor.java | 2 +- .../db/tries/RangeIntersectionCursor.java | 15 +- .../cassandra/db/tries/RangesCursor.java | 12 +- .../cassandra/db/tries/SingletonCursor.java | 6 +- .../db/tries/SingletonOrderedCursor.java | 2 +- .../cassandra/db/tries/TrieSetCursor.java | 29 +- .../db/tries/VerificationCursor.java | 32 +- .../cassandra/db/tries/RangesTrieSetTest.java | 26 +- .../apache/cassandra/db/tries/TrieUtil.java | 44 ++- 20 files changed, 422 insertions(+), 249 deletions(-) diff --git a/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java b/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java index 3f9632ee4d1b..6c2b88a5353d 100644 --- a/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java +++ b/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java @@ -95,6 +95,9 @@ abstract class CollectionMergeCursor> implements Cursor CollectionMergeCursor(Trie.CollectionMergeResolver resolver, Direction direction, Collection inputs, IntFunction cursorArrayConstructor, BiFunction extractor) { this.resolver = resolver; @@ -115,6 +118,7 @@ CollectionMergeCursor(Trie.CollectionMergeResolver resolver, Direction di } // The cursors are all currently positioned on the root and thus in valid heap order. + assert Arrays.stream(heap).allMatch(x -> equalCursor(head, x)); } /// Interface for internal operations that can be applied to selected top elements of the heap. @@ -241,14 +245,22 @@ private void heapifyDown(C item, int index) private long maybeSwapHead(long headPosition) { long heap0Position = heap[0].encodedPosition(); - if (Cursor.compare(headPosition, heap0Position) <= 0) + long cmp = Cursor.compare(headPosition, heap0Position); + if (cmp < 0) + { + currentPosition = headPosition; + positionCollected = true; return headPosition; // head is still smallest + } - // otherwise we need to swap heap and heap[0] - C newHeap0 = head; - head = heap[0]; - heapifyDown(newHeap0, 0); - return heap0Position; + if (cmp > 0) + { + // otherwise we need to swap heap and heap[0] + C newHeap0 = head; + head = heap[0]; + heapifyDown(newHeap0, 0); + } + return encodedPosition(); } boolean branchHasMultipleSources() @@ -265,6 +277,7 @@ boolean isExhausted() public long advance() { contentCollected = false; + positionCollected = false; return doAdvance(); } @@ -278,6 +291,7 @@ private long doAdvance() public long advanceMultiple(TransitionsReceiver receiver) { contentCollected = false; + positionCollected = false; // If the current position is present in just one cursor, we can safely descend multiple levels within // its branch as no one of the other tries has content for it. if (branchHasMultipleSources()) @@ -317,6 +331,7 @@ public void apply(C cursor) } contentCollected = false; + positionCollected = false; applyToSelectedElementsInHeap(new SkipTo(), 0); return maybeSwapHead(head.skipTo(encodedSkipPosition)); } @@ -324,7 +339,25 @@ public void apply(C cursor) @Override public long encodedPosition() { - return head.encodedPosition(); + if (!positionCollected) + { + long pos = head.encodedPosition(); + if (Cursor.isExhausted(pos) || !branchHasMultipleSources()) + currentPosition = pos; + else + { + // Returns head's position with flags unioned from all cursors at the same position, + // since multiple sources may each contribute flags (e.g. MAY_HAVE_CONTENT_BIT) that + // the head alone would not reflect. + currentPosition = pos; + applyToSelectedElementsInHeap((self, cursor, index) -> { + currentPosition |= cursor.encodedPosition(); + }, 0); + currentPosition = Cursor.unionFlags(pos, currentPosition, Cursor.FLAGS_MASK); + } + positionCollected = true; + } + return currentPosition; } @Override diff --git a/src/java/org/apache/cassandra/db/tries/Cursor.java b/src/java/org/apache/cassandra/db/tries/Cursor.java index a24eb07f69ce..a1ed19d5de40 100644 --- a/src/java/org/apache/cassandra/db/tries/Cursor.java +++ b/src/java/org/apache/cassandra/db/tries/Cursor.java @@ -137,7 +137,12 @@ interface Cursor /// Used for sets and ranges to correctly define the range states for branch-inclusive ranges. long ON_RETURN_PATH_BIT = 1L << 19; - /// Mask of the transition bits including the direction. We apply xor with this value to form a position in the + /// Mask for the bits used for flags that should not affect position comparison. + long FLAGS_MASK = 0xFFFFL; + + /// Flag indicating whether this position may have content. + long MAY_HAVE_CONTENT_BIT = 1L; + /// reverse direction. long TRANSITION_MASK = 0x8FFL << TRANSITION_SHIFT; @@ -198,9 +203,25 @@ static boolean isOnReturnPath(long encodedPosition) static long compare(long encoded1, long encoded2) { // This can support depth of 2^31 - 1 without overflowing. - return encoded1 - encoded2; + return (encoded1 & ~FLAGS_MASK) - (encoded2 & ~FLAGS_MASK); + } + + + /// Returns a new position with flags from pos2 combined using union operation. + /// Union: (pos1 | pos2) & flags | (pos1 & ~flags) + static long unionFlags(long pos1, long pos2, long flags) + { + return (pos1 | pos2) & flags | (pos1 & ~flags); } + /// Returns a new position with flags from pos2 combined using intersection operation. + /// Intersection: pos1 & pos2 & flags | (pos1 & ~flags) + static long intersectionFlags(long pos1, long pos2, long flags) + { + return pos1 & pos2 & flags | (pos1 & ~flags); + } + + static long rootPosition(Direction direction) { return direction.select(ROOT_POSITION_FORWARD, ROOT_POSITION_REVERSE); @@ -225,7 +246,7 @@ static long exhaustedPosition(long prevPosition) static boolean isRootPosition(long encodedPosition) { - return encodedPosition == ROOT_POSITION_FORWARD || encodedPosition == ROOT_POSITION_REVERSE; + return compare(encodedPosition, ROOT_POSITION_FORWARD) == 0 || compare(encodedPosition, ROOT_POSITION_REVERSE) == 0; } static long encode(int depth, int transition, Direction direction) @@ -252,7 +273,7 @@ static long positionForDescentWithByte(long encodedPosition, int incomingByte) /// returned encoded position is a valid `skipTo` position for the current state. static long positionForSkippingBranch(long encodedBranchPosition) { - return encodedBranchPosition + (1L << TRANSITION_SHIFT); + return (encodedBranchPosition & ~FLAGS_MASK) + (1L << TRANSITION_SHIFT); } /// Returns true if the given `currPosition` as returned by `advance`, `advanceMultiple` or `skipTo` is the result @@ -267,10 +288,11 @@ static boolean ascended(long currPosition, long prevPosition) static String toString(long encodedPosition) { - return String.format("depth %d incomingTransition %02x%s %s", + return String.format("depth %d incomingTransition %02x%s%s %s", depth(encodedPosition), incomingTransition(encodedPosition), isOnReturnPath(encodedPosition) ? "↑" : " ", + (encodedPosition & MAY_HAVE_CONTENT_BIT) != 0 ? "*" : " ", direction(encodedPosition)); } @@ -361,9 +383,12 @@ default T advanceToContent(ResettingTransitionsReceiver receiver) if (isOnReturnPath(currPosition)) receiver.onReturnPath(); } - T content = content(); - if (content != null) - return content; + if ((currPosition & MAY_HAVE_CONTENT_BIT) != 0) + { + T content = content(); + if (content != null) + return content; + } prevPosition = currPosition; } } @@ -406,7 +431,8 @@ default boolean descendAlong(ByteSource bytes) while (next != ByteSource.END_OF_STREAM) { long nextPosition = positionForDescentWithByte(position, next); - if (compare(skipTo(nextPosition), nextPosition) != 0) + long arrived = skipTo(nextPosition); + if (compare(arrived, nextPosition) != 0) return false; next = bytes.next(); position = nextPosition; @@ -463,7 +489,8 @@ interface Walker extends Cursor.ResettingTransitionsReceiver default R process(Cursor.Walker walker) { assertFresh(); - T content = content(); // handle content on the root node + long currentPosition = encodedPosition(); + T content = (currentPosition & MAY_HAVE_CONTENT_BIT) != 0 ? content() : null; if (content == null) content = advanceToContent(walker); @@ -487,7 +514,8 @@ default R process(Cursor.Walker walker) default R processSkippingBranches(Cursor.Walker walker) { assertFresh(); - T content = content(); // handle content on the root node + long currentPosition = encodedPosition(); + T content = (currentPosition & MAY_HAVE_CONTENT_BIT) != 0 ? content() : null; if (content != null) { walker.content(content); @@ -504,7 +532,7 @@ default R processSkippingBranches(Cursor.Walker walker) break; walker.resetPathLength(depth(current) - 1); walker.addPathByte(incomingTransition(current)); - content = content(); + content = (current & MAY_HAVE_CONTENT_BIT) != 0 ? content() : null; if (content == null) content = advanceToContent(walker); } @@ -541,7 +569,7 @@ public ByteComparable.Version byteComparableVersion() @Override public Cursor tailCursor(Direction direction) { - assert position == Cursor.rootPosition(direction) : "tailTrie called on exhausted cursor"; + assert compare(position, Cursor.rootPosition(direction)) == 0 : "tailTrie called on exhausted cursor"; return new Empty<>(direction, byteComparableVersion); } diff --git a/src/java/org/apache/cassandra/db/tries/DeletionAwareCursor.java b/src/java/org/apache/cassandra/db/tries/DeletionAwareCursor.java index 682f4cf05ac3..8fc573209017 100644 --- a/src/java/org/apache/cassandra/db/tries/DeletionAwareCursor.java +++ b/src/java/org/apache/cassandra/db/tries/DeletionAwareCursor.java @@ -87,15 +87,21 @@ default R process(DeletionAwareWalker walker) while (true) { - T content = content(); // handle content on the root node - if (content != null) - walker.content(content); + // Always check for deletion branches as they are independent of content RangeCursor deletionBranch = deletionBranchCursor(direction()); if (deletionBranch != null && walker.enterDeletionsBranch()) { processDeletionBranch(walker, deletionBranch); walker.exitDeletionsBranch(); } + + // MAY_HAVE_CONTENT_BIT optimization: only call content() if flag indicates potential content + if ((currentPosition & MAY_HAVE_CONTENT_BIT) != 0) + { + T content = content(); + if (content != null) + walker.content(content); + } long prevPosition = currentPosition; currentPosition = advanceMultiple(walker); @@ -113,7 +119,7 @@ default R process(DeletionAwareWalker walker) private static void processDeletionBranch(DeletionAwareWalker walker, Cursor cursor) { cursor.assertFresh(); - D content = cursor.content(); // handle content on the root node + D content = (cursor.encodedPosition() & MAY_HAVE_CONTENT_BIT) != 0 ? cursor.content() : null; if (content == null) content = cursor.advanceToContent(walker); diff --git a/src/java/org/apache/cassandra/db/tries/DeletionAwareMergeSource.java b/src/java/org/apache/cassandra/db/tries/DeletionAwareMergeSource.java index cc22b2716bf5..10dac17e1a1a 100644 --- a/src/java/org/apache/cassandra/db/tries/DeletionAwareMergeSource.java +++ b/src/java/org/apache/cassandra/db/tries/DeletionAwareMergeSource.java @@ -131,7 +131,7 @@ private long skipDeletionsToDataPosition(long dataPosition) if (Cursor.isExhausted(deletionsPositionUncorrected)) return leaveDeletionsBranch(dataPosition); else - return setAtDeletionsAndReturnPosition(deletionsPositionUncorrected == deletionsSkipPosition, + return setAtDeletionsAndReturnPosition(Cursor.compare(deletionsPositionUncorrected, deletionsSkipPosition) == 0, dataPosition); } diff --git a/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java b/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java index 762dcc717cf5..505edce0aadb 100644 --- a/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java +++ b/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java @@ -1,164 +1,163 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.cassandra.db.tries; - -import org.apache.cassandra.utils.bytecomparable.ByteComparable; -import org.apache.cassandra.utils.bytecomparable.ByteSource; - -class DepthAdjustedCursor> implements Cursor -{ - final C source; - private long depthAdjustment; - private long matchingPositionAtRoot; - - DepthAdjustedCursor(C source, long matchingPositionAtRoot) - { - this.source = source; - setAttachmentPoint(matchingPositionAtRoot); - } - - void setAttachmentPoint(long matchingPositionAtRoot) - { - this.matchingPositionAtRoot = matchingPositionAtRoot; - this.depthAdjustment = Cursor.depthCorrectionValue(matchingPositionAtRoot); - } - - long toAdjustedDepth(long position) - { - if (Cursor.depth(position) > 0) - return position + depthAdjustment; - else if (Cursor.isExhausted(position)) - return position; - else - return matchingPositionAtRoot | (position & Cursor.ON_RETURN_PATH_BIT); - } - - long fromAdjustedDepth(long position) - { - // matchingPositionAtRoot | ON_RETURN_PATH_BIT should map to rootPosition | ON_RETURN_PATH_BIT - long adjusted = position - depthAdjustment; - if (Cursor.depth(adjusted) > 0) - return adjusted; - - // The only non-exhausted position that can be requested with this depth is the return path stop for the root. - if (position == (matchingPositionAtRoot | Cursor.ON_RETURN_PATH_BIT)) - return Cursor.rootReturnPosition(adjusted); - else - return Cursor.exhaustedPosition(adjusted); - - } - - @Override - public long encodedPosition() - { - return toAdjustedDepth(source.encodedPosition()); - } - - @Override - public T content() - { - return source.content(); - } - - @Override - public ByteComparable.Version byteComparableVersion() - { - return source.byteComparableVersion(); - } - - @Override - public long advance() - { - return toAdjustedDepth(source.advance()); - } - - @Override - public long advanceMultiple(TransitionsReceiver receiver) - { - return toAdjustedDepth(source.advanceMultiple(receiver)); - } - - @Override - public long skipTo(long encodedSkipPosition) - { - return toAdjustedDepth(source.skipTo(fromAdjustedDepth(encodedSkipPosition))); - } - - @Override - public boolean descendAlong(ByteSource bytes) - { - return source.descendAlong(bytes); - } - - @Override - public Cursor tailCursor(Direction direction) - { - return source.tailCursor(direction); - } - - static Cursor make(Cursor source, long matchingPositionAtRoot) - { - return Cursor.depth(matchingPositionAtRoot) == 0 ? source : new Plain<>(source, matchingPositionAtRoot); - } - - static > RangeCursor make(RangeCursor source, long matchingPositionAtRoot) - { - return Cursor.depth(matchingPositionAtRoot) == 0 ? source : new Range<>(source, matchingPositionAtRoot); - } - - static class Plain extends DepthAdjustedCursor> - { - public Plain(Cursor source, long matchingPositionAtRoot) - { - super(source, matchingPositionAtRoot); - } - } - - static class Range> extends DepthAdjustedCursor> implements RangeCursor - { - Range(RangeCursor source, long matchingPositionAtRoot) - { - super(source, matchingPositionAtRoot); - } - - @Override - public S state() - { - return source.state(); - } - - @Override - public S precedingState() - { - return source.precedingState(); - } - - @Override - public RangeCursor precedingStateCursor(Direction direction) - { - return source.precedingStateCursor(direction); - } - - @Override - public RangeCursor tailCursor(Direction direction) - { - return source.tailCursor(direction); - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.tries; + +import org.apache.cassandra.utils.bytecomparable.ByteComparable; +import org.apache.cassandra.utils.bytecomparable.ByteSource; + +class DepthAdjustedCursor> implements Cursor +{ + final C source; + private long depthAdjustment; + private long matchingPositionAtRoot; + + DepthAdjustedCursor(C source, long matchingPositionAtRoot) + { + this.source = source; + setAttachmentPoint(matchingPositionAtRoot); + } + + void setAttachmentPoint(long matchingPositionAtRoot) + { + this.matchingPositionAtRoot = matchingPositionAtRoot; + this.depthAdjustment = Cursor.depthCorrectionValue(matchingPositionAtRoot); + } + + long toAdjustedDepth(long position) + { + if (Cursor.depth(position) > 0) + return position + depthAdjustment; + else if (Cursor.isExhausted(position)) + return position; + else + return matchingPositionAtRoot | (position & Cursor.ON_RETURN_PATH_BIT); + } + + long fromAdjustedDepth(long position) + { + // matchingPositionAtRoot | ON_RETURN_PATH_BIT should map to rootPosition | ON_RETURN_PATH_BIT + long adjusted = position - depthAdjustment; + if (Cursor.depth(adjusted) > 0) + return adjusted; + + // The only non-exhausted position that can be requested with this depth is the return path stop for the root. + if (position == (matchingPositionAtRoot | Cursor.ON_RETURN_PATH_BIT)) + return Cursor.rootReturnPosition(adjusted); + else + return Cursor.exhaustedPosition(adjusted); + } + + @Override + public long encodedPosition() + { + return toAdjustedDepth(source.encodedPosition()); + } + + @Override + public T content() + { + return source.content(); + } + + @Override + public ByteComparable.Version byteComparableVersion() + { + return source.byteComparableVersion(); + } + + @Override + public long advance() + { + return toAdjustedDepth(source.advance()); + } + + @Override + public long advanceMultiple(TransitionsReceiver receiver) + { + return toAdjustedDepth(source.advanceMultiple(receiver)); + } + + @Override + public long skipTo(long encodedSkipPosition) + { + return toAdjustedDepth(source.skipTo(fromAdjustedDepth(encodedSkipPosition))); + } + + @Override + public boolean descendAlong(ByteSource bytes) + { + return source.descendAlong(bytes); + } + + @Override + public Cursor tailCursor(Direction direction) + { + return source.tailCursor(direction); + } + + static Cursor make(Cursor source, long matchingPositionAtRoot) + { + return Cursor.depth(matchingPositionAtRoot) == 0 ? source : new Plain<>(source, matchingPositionAtRoot); + } + + static > RangeCursor make(RangeCursor source, long matchingPositionAtRoot) + { + return Cursor.depth(matchingPositionAtRoot) == 0 ? source : new Range<>(source, matchingPositionAtRoot); + } + + static class Plain extends DepthAdjustedCursor> + { + public Plain(Cursor source, long matchingPositionAtRoot) + { + super(source, matchingPositionAtRoot); + } + } + + static class Range> extends DepthAdjustedCursor> implements RangeCursor + { + Range(RangeCursor source, long matchingPositionAtRoot) + { + super(source, matchingPositionAtRoot); + } + + @Override + public S state() + { + return source.state(); + } + + @Override + public S precedingState() + { + return source.precedingState(); + } + + @Override + public RangeCursor precedingStateCursor(Direction direction) + { + return source.precedingStateCursor(direction); + } + + @Override + public RangeCursor tailCursor(Direction direction) + { + return source.tailCursor(direction); + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/cassandra/db/tries/FlexibleMergeCursor.java b/src/java/org/apache/cassandra/db/tries/FlexibleMergeCursor.java index 5a41c4738821..cb58f5445b09 100644 --- a/src/java/org/apache/cassandra/db/tries/FlexibleMergeCursor.java +++ b/src/java/org/apache/cassandra/db/tries/FlexibleMergeCursor.java @@ -53,12 +53,15 @@ enum State FlexibleMergeCursor(C c1, D c2) { c1.assertFresh(); - c2.assertFresh(); + if (c2 != null) + c2.assertFresh(); this.c1 = c1; this.c2 = c2; this.c2depthCorrection = 0; state = c2 != null ? State.AT_BOTH : State.C1_ONLY; currentPosition = c1.encodedPosition(); + if (c2 != null) + currentPosition = Cursor.unionFlags(currentPosition, c2.encodedPosition(), Cursor.FLAGS_MASK); // We can't call postAdvance here because the class may not be completely initialized. // The concrete class should do that instead } @@ -70,6 +73,7 @@ public void addCursor(D c2) this.c2 = c2; this.c2depthCorrection = Cursor.depthCorrectionValue(currentPosition); this.state = State.AT_BOTH; + currentPosition = Cursor.unionFlags(currentPosition, c2.encodedPosition(), Cursor.FLAGS_MASK); } abstract long postAdvance(long depth); @@ -174,7 +178,8 @@ private long checkOrder(long c1pos, long c2posUncorrected) } // c1pos == c2pos state = State.AT_BOTH; - return postAdvance(currentPosition = c1pos); + currentPosition = Cursor.unionFlags(c1pos, c2pos, Cursor.FLAGS_MASK); + return postAdvance(currentPosition); } private long leaveC2(long c1pos) diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java index 8c25e35209e2..5cf87e8aa38e 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java @@ -137,6 +137,8 @@ long updateActiveAndReturn(long position) { if (!Cursor.isExhausted(position)) { + currentPosition = position | MAY_HAVE_CONTENT_BIT; + // Always check if we are seeing new content; if we do, that's an easy state update. S content = content(); if (content != null) @@ -271,11 +273,11 @@ static class InMemoryRangeBranchCursor> extends InMemory InMemoryRangeBranchCursor(InMemoryReadTrie trie, Direction direction, int root, S rootDescentContent, S rootAscentContent) { super(trie, direction, root); - content = rootDescentContent; this.rootAscentContent = rootAscentContent; if (rootAscentContent != null) addBacktrack(NONE, 0, -1); - updateActiveAndReturn(encodedPosition()); + setNodeState(currentPosition, rootDescentContent, currentFullNode, currentNode); + updateActiveAndReturn(currentPosition); } @Override @@ -299,7 +301,7 @@ long advanceToNextChildWithTarget(int node, int data, int transition) long presentAscentPathContent() { - return setNodeState(Cursor.encode(++depth, 0, direction) | ON_RETURN_PATH_BIT, + return setNodeState(Cursor.unionFlags(Cursor.encode(++depth, 0, direction), MAY_HAVE_CONTENT_BIT, MAY_HAVE_CONTENT_BIT) | ON_RETURN_PATH_BIT, rootAscentContent, NONE, NONE); diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java index 11bd2a44c837..7b7bde445bd8 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java @@ -686,7 +686,7 @@ static class InMemoryCursor extends CursorBacktrackingState implements Cursor final InMemoryReadTrie trie; int currentNode; int currentFullNode; - private long currentPosition; + protected long currentPosition; protected int depth; protected T content; final Direction direction; @@ -1254,6 +1254,9 @@ else if (offset(node) == PREFIX_OFFSET) content = null; currentNode = node; } + + if (content != null || isLeaf(node)) + currentPosition |= MAY_HAVE_CONTENT_BIT; } /// Get the content from a prefix node and/or put a backtracking entry for return path data. @@ -1294,23 +1297,27 @@ protected boolean shouldPresentOnTheReturnPath(int node) long descendInto(int child, int transition) { ++depth; - currentPosition = Cursor.encode(depth, transition, direction); + long pos = Cursor.encode(depth, transition, direction); + currentPosition = pos; setCurrentNodeAndApplyPrefixes(child, depth, transition, false); return currentPosition; } long descendIntoChain(int child, int transition) { - return setNodeState(Cursor.encode(++depth, transition, direction), null, child, child); + long pos = Cursor.encode(++depth, transition, direction); + return setNodeState(pos, null, child, child); } long setNodeState(long nextPosition, T nodeContent, int fullNode, int node) { currentPosition = nextPosition; + if (nodeContent != null || isLeaf(fullNode)) + currentPosition |= MAY_HAVE_CONTENT_BIT; content = nodeContent; currentFullNode = fullNode; currentNode = node; - return nextPosition; + return currentPosition; } } diff --git a/src/java/org/apache/cassandra/db/tries/IntersectionCursor.java b/src/java/org/apache/cassandra/db/tries/IntersectionCursor.java index 0b9b8b9c4efd..e178bf410da1 100644 --- a/src/java/org/apache/cassandra/db/tries/IntersectionCursor.java +++ b/src/java/org/apache/cassandra/db/tries/IntersectionCursor.java @@ -201,6 +201,15 @@ public T content() throw new AssertionError(); } } + + @Override + public long encodedPosition() + { + long pos = super.encodedPosition(); + if (state == State.MATCHING && !set.state().applicableAfter) + pos &= ~Cursor.MAY_HAVE_CONTENT_BIT; + return pos; + } } /// Intersection cursor for [Trie]. diff --git a/src/java/org/apache/cassandra/db/tries/MergeCursor.java b/src/java/org/apache/cassandra/db/tries/MergeCursor.java index e231b8ba0390..9300b7f5a89c 100644 --- a/src/java/org/apache/cassandra/db/tries/MergeCursor.java +++ b/src/java/org/apache/cassandra/db/tries/MergeCursor.java @@ -37,6 +37,7 @@ abstract class MergeCursor, U, D extends Cursor, R> im boolean atC1; boolean atC2; + long currentPosition; MergeCursor(C c1, D c2) { @@ -45,6 +46,7 @@ abstract class MergeCursor, U, D extends Cursor, R> im this.c1 = c1; this.c2 = c2; atC1 = atC2 = true; + currentPosition = Cursor.unionFlags(c1.encodedPosition(), c2.encodedPosition(), Cursor.FLAGS_MASK); } @Override @@ -83,13 +85,16 @@ long checkOrder(long c1pos, long c2pos) long cmp = Cursor.compare(c1pos, c2pos); atC1 = cmp <= 0; atC2 = cmp >= 0; - return atC1 ? c1pos : c2pos; + if (atC1 && atC2) + return currentPosition = Cursor.unionFlags(c1pos, c2pos, Cursor.FLAGS_MASK); + else + return currentPosition = atC1 ? c1pos : c2pos; } @Override public long encodedPosition() { - return atC1 ? c1.encodedPosition() : c2.encodedPosition(); + return currentPosition; } @Override diff --git a/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java b/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java index bf54001bc34c..3f5fb1bea67b 100644 --- a/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java +++ b/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java @@ -116,7 +116,10 @@ public long skipTo(long encodedSkipPosition) private long setPositionAndCheckPrefixDone(long position) { if (nextPrefixByte == ByteSource.END_OF_STREAM) + { setAttachmentPoint(position); + position = toAdjustedDepth(source.encodedPosition()); + } currentPosition = position; return position; @@ -266,7 +269,8 @@ static class DeletionAwareSeparately> @Override public RangeCursor deletionBranchCursor(Direction direction) { - return Cursor.isRootPosition(encodedPosition()) && deletionBranch != null + long pos = encodedPosition(); + return (Cursor.isRootPosition(pos) || Cursor.compare(pos, matchingPositionAtRoot) == 0) && deletionBranch != null ? deletionBranch.tailCursor(direction) : null; } @@ -274,7 +278,8 @@ public RangeCursor deletionBranchCursor(Direction direction) @Override public DeletionAwareCursor tailCursor(Direction direction) { - if (Cursor.isRootPosition(encodedPosition())) + long pos = encodedPosition(); + if (Cursor.isRootPosition(pos) || Cursor.compare(pos, matchingPositionAtRoot) == 0) return new DeletionAwareSeparately<>(this, direction); else return super.tailCursor(direction); diff --git a/src/java/org/apache/cassandra/db/tries/RangeApplyCursor.java b/src/java/org/apache/cassandra/db/tries/RangeApplyCursor.java index ea7612b7d7dd..51f4de351aee 100644 --- a/src/java/org/apache/cassandra/db/tries/RangeApplyCursor.java +++ b/src/java/org/apache/cassandra/db/tries/RangeApplyCursor.java @@ -105,7 +105,7 @@ long maybeSkipRange(long dataPosition) private long skipRangeToDataPosition(long dataPosition) { long rangePosition = range.skipTo(dataPosition); - return setAtRangeAndReturnPosition(rangePosition == dataPosition, + return setAtRangeAndReturnPosition(Cursor.compare(rangePosition, dataPosition) == 0, dataPosition); } diff --git a/src/java/org/apache/cassandra/db/tries/RangeIntersectionCursor.java b/src/java/org/apache/cassandra/db/tries/RangeIntersectionCursor.java index 00ff2733a20f..664312b6e238 100644 --- a/src/java/org/apache/cassandra/db/tries/RangeIntersectionCursor.java +++ b/src/java/org/apache/cassandra/db/tries/RangeIntersectionCursor.java @@ -40,7 +40,7 @@ public RangeIntersectionCursor(RangeCursor src, TrieSetCursor set) this.set = set; this.src = src; assert Cursor.compare(src.encodedPosition(), set.encodedPosition()) == 0; - matchingPosition(set.encodedPosition()); + matchingPosition(src.encodedPosition()); } @Override @@ -168,7 +168,7 @@ private long advanceWithSourceAhead(long setPosition) if (cmp < 0) return coveredAreaWithSourceAhead(setPosition); if (cmp == 0) - return matchingPosition(setPosition); + return matchingPosition(sourcePosition); // Advancing cursor moved beyond the ahead cursor. Check if roles have reversed. if (set.precedingIncluded()) @@ -184,7 +184,7 @@ private long advanceSourceToIntersection(long setPosition) // Set is ahead of source, but outside the covered area. Skip source to set's position. long sourcePosition = src.skipTo(setPosition); if (Cursor.compare(sourcePosition, setPosition) == 0) - return matchingPosition(setPosition); + return matchingPosition(sourcePosition); if (src.precedingState() != null) return coveredAreaWithSourceAhead(setPosition); @@ -211,7 +211,7 @@ private long advanceSetToIntersection(long sourcePosition) // Set is ahead of source, but outside the covered area. Skip source to set's position. sourcePosition = src.skipTo(setPosition); if (Cursor.compare(setPosition, sourcePosition) == 0) - return matchingPosition(setPosition); + return matchingPosition(sourcePosition); if (src.precedingState() != null) return coveredAreaWithSourceAhead(setPosition); } @@ -245,9 +245,12 @@ private S restrict(S srcState, TrieSetCursor.RangeState setState) private long setState(State state, long position, S cursorState) { this.state = state; - this.currentPosition = position; + if (cursorState != null && cursorState.isBoundary()) + this.currentPosition = position | MAY_HAVE_CONTENT_BIT; + else + this.currentPosition = position & ~MAY_HAVE_CONTENT_BIT; this.currentState = cursorState; - return position; + return this.currentPosition; } @Override diff --git a/src/java/org/apache/cassandra/db/tries/RangesCursor.java b/src/java/org/apache/cassandra/db/tries/RangesCursor.java index 005b3ccdfb43..2b3a1d9edc0d 100644 --- a/src/java/org/apache/cassandra/db/tries/RangesCursor.java +++ b/src/java/org/apache/cassandra/db/tries/RangesCursor.java @@ -166,8 +166,11 @@ private RangesCursor(ByteComparable.Version byteComparableVersion, this.sources = sources; this.currentIdx = startIdx; this.endIdx = endIdxExclusive; - this.currentPosition = currentPosition; this.currentState = currentState; + if (currentState.isBoundary()) + this.currentPosition = currentPosition | MAY_HAVE_CONTENT_BIT; + else + this.currentPosition = currentPosition & ~MAY_HAVE_CONTENT_BIT; this.endsAfterMask = endsAfterMask; } @@ -238,8 +241,11 @@ private long advanceBoundariesAndSelectState(long nextPosition) containedSelection |= direction.select(RangeState.APPLICABLE_AFTER, RangeState.APPLICABLE_BEFORE); currentState = RangeState.values()[containedSelection]; - currentPosition = nextPosition; - return nextPosition; + if (currentState.isBoundary()) + currentPosition = nextPosition | MAY_HAVE_CONTENT_BIT; + else + currentPosition = nextPosition & ~MAY_HAVE_CONTENT_BIT; + return currentPosition; } private long maybeOnReturnPath(long nextPosition, int index, Direction direction) diff --git a/src/java/org/apache/cassandra/db/tries/SingletonCursor.java b/src/java/org/apache/cassandra/db/tries/SingletonCursor.java index f7641b0f6858..5be9c8779d10 100644 --- a/src/java/org/apache/cassandra/db/tries/SingletonCursor.java +++ b/src/java/org/apache/cassandra/db/tries/SingletonCursor.java @@ -83,7 +83,7 @@ public long advance() currentPosition = nextPosition; if (!Cursor.isExhausted(nextPosition)) prepareNextPosition(currentPosition); - return currentPosition; + return encodedPosition(); } @Override @@ -106,7 +106,7 @@ public long advanceMultiple(TransitionsReceiver receiver) } currentPosition = Cursor.positionForDescentWithByte(pos, current); nextPosition = Cursor.exhaustedPosition(currentPosition); - return currentPosition; + return encodedPosition(); } @Override @@ -132,7 +132,7 @@ public T content() @Override public long encodedPosition() { - return currentPosition; + return atEnd() ? currentPosition | MAY_HAVE_CONTENT_BIT : currentPosition; } @Override diff --git a/src/java/org/apache/cassandra/db/tries/SingletonOrderedCursor.java b/src/java/org/apache/cassandra/db/tries/SingletonOrderedCursor.java index 28b36ec341c1..6be3d79ac6a8 100644 --- a/src/java/org/apache/cassandra/db/tries/SingletonOrderedCursor.java +++ b/src/java/org/apache/cassandra/db/tries/SingletonOrderedCursor.java @@ -92,7 +92,7 @@ public long advanceMultiple(TransitionsReceiver receiver) super.advanceMultiple(receiver); if (presentOnReturnPath) currentPosition |= ON_RETURN_PATH_BIT; - return currentPosition; + return encodedPosition(); } @Override diff --git a/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java b/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java index fc97295e8057..50366d6050e9 100644 --- a/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java +++ b/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java @@ -217,6 +217,7 @@ default TrieSetCursor negated() class Negated implements TrieSetCursor { final TrieSetCursor source; + final Direction direction; enum Overriding { @@ -227,9 +228,16 @@ enum Overriding Negated(TrieSetCursor source) { this.source = source; + this.direction = source.direction(); overriding = Overriding.ROOT; } + @Override + public Direction direction() + { + return direction; + } + @Override public long encodedPosition() { @@ -237,12 +245,19 @@ public long encodedPosition() switch (overriding) { case ROOT_RETURN: - return Cursor.rootReturnPosition(encodedPosition); + return Cursor.rootReturnPosition(encodedPosition) | MAY_HAVE_CONTENT_BIT; case EXHAUSTED: return Cursor.exhaustedPosition(encodedPosition); case ROOT: case NONE: default: + if (!Cursor.isExhausted(encodedPosition)) + { + if (state().isBoundary()) + encodedPosition |= MAY_HAVE_CONTENT_BIT; + else + encodedPosition &= ~MAY_HAVE_CONTENT_BIT; + } return encodedPosition; } } @@ -306,22 +321,22 @@ public long advance() { case ROOT_RETURN: overriding = Overriding.EXHAUSTED; - return encodedPosition(); + break; default: - return checkOverride(source.advance()); + checkOverride(source.advance()); + break; } + return encodedPosition(); } @Override public long skipTo(long encodedSkipPosition) { if (Cursor.isExhausted(encodedSkipPosition) || overriding == Overriding.ROOT_RETURN) - { overriding = Overriding.EXHAUSTED; - return encodedPosition(); - } else - return checkOverride(source.skipTo(encodedSkipPosition)); + checkOverride(source.skipTo(encodedSkipPosition)); + return encodedPosition(); } // Sets don't implement advanceMultiple as they are only meant to limit data tries. diff --git a/src/java/org/apache/cassandra/db/tries/VerificationCursor.java b/src/java/org/apache/cassandra/db/tries/VerificationCursor.java index c4fc85426b5a..b89e8ca6340a 100644 --- a/src/java/org/apache/cassandra/db/tries/VerificationCursor.java +++ b/src/java/org/apache/cassandra/db/tries/VerificationCursor.java @@ -73,29 +73,31 @@ class Plain> implements Cursor, Cursor.TransitionsRece { this.direction = cursor.direction(); this.source = cursor; - this.returnedPosition = Cursor.rootPosition(direction); + this.returnedPosition = source.encodedPosition(); this.path = new byte[16]; - long reportedPosition = source.encodedPosition(); - assert Cursor.direction(reportedPosition) == direction : + assert Cursor.direction(returnedPosition) == direction : String.format("Invalid direction bit %d in root position %s (%016x)\n%s", - (reportedPosition >> DIRECTION_BIT) & 1, - Cursor.toString(reportedPosition), - reportedPosition, + (returnedPosition >> DIRECTION_BIT) & 1, + Cursor.toString(returnedPosition), + returnedPosition, this); - assert Cursor.compare(reportedPosition, returnedPosition) == 0 : + assert Cursor.compare(returnedPosition, Cursor.rootPosition(direction)) == 0 : String.format("Invalid initial position %s (must be %s)\n%s", - Cursor.toString(reportedPosition), Cursor.toString(returnedPosition), + Cursor.toString(Cursor.rootPosition(direction)), this); } @Override public long encodedPosition() { - assert Cursor.compare(source.encodedPosition(), returnedPosition) == 0 : - String.format("Position changed without advance: %s -> %s\n%s", + long reportedPosition = source.encodedPosition(); + assert Cursor.compare(reportedPosition, returnedPosition) == 0 : + String.format("Position changed without advance: %s -> %s (reported bits %016x vs expected %016x)\n%s", Cursor.toString(returnedPosition), - Cursor.toString(source.encodedPosition()), + Cursor.toString(reportedPosition), + reportedPosition, + returnedPosition, this); return returnedPosition; } @@ -107,7 +109,13 @@ public T content() String.format("Cannot query content on exhausted cursor.\n%s", this); - return source.content(); + T content = source.content(); + if (content != null) + assert (returnedPosition & MAY_HAVE_CONTENT_BIT) != 0 : + String.format("Non-null content for position without MAY_HAVE_CONTENT_BIT: %s\n%s", + Cursor.toString(returnedPosition), + this); + return content; } @Override diff --git a/test/unit/org/apache/cassandra/db/tries/RangesTrieSetTest.java b/test/unit/org/apache/cassandra/db/tries/RangesTrieSetTest.java index 68f702db7780..51c173944e58 100644 --- a/test/unit/org/apache/cassandra/db/tries/RangesTrieSetTest.java +++ b/test/unit/org/apache/cassandra/db/tries/RangesTrieSetTest.java @@ -109,21 +109,28 @@ public TrieSetCursor.RangeState state() return cursor.state(); } + private long applyBit(long pos) + { + if (!Cursor.isExhausted(pos) && content() != null) + pos |= Cursor.MAY_HAVE_CONTENT_BIT; + return pos; + } + public long encodedPosition() { - return cursor.encodedPosition(); + return applyBit(cursor.encodedPosition()); } @Override public long advance() { - return cursor.advance(); + return applyBit(cursor.advance()); } @Override public long skipTo(long encodedSkipPosition) { - return cursor.skipTo(encodedSkipPosition); + return applyBit(cursor.skipTo(encodedSkipPosition)); } @Override @@ -755,10 +762,17 @@ public TrieSetCursor tailCursor(Direction direction) return new TrieSetOverRangeCursor(source.tailCursor(direction)); } + private long applyBit(long pos) + { + if (!Cursor.isExhausted(pos) && state() != null) + pos |= Cursor.MAY_HAVE_CONTENT_BIT; + return pos; + } + @Override public long encodedPosition() { - return source.encodedPosition(); + return applyBit(source.encodedPosition()); } @Override @@ -770,13 +784,13 @@ public ByteComparable.Version byteComparableVersion() @Override public long advance() { - return source.advance(); + return applyBit(source.advance()); } @Override public long skipTo(long encodedSkipPosition) { - return source.skipTo(encodedSkipPosition); + return applyBit(source.skipTo(encodedSkipPosition)); } } } diff --git a/test/unit/org/apache/cassandra/db/tries/TrieUtil.java b/test/unit/org/apache/cassandra/db/tries/TrieUtil.java index 56066fcd1024..48ff0b28f388 100644 --- a/test/unit/org/apache/cassandra/db/tries/TrieUtil.java +++ b/test/unit/org/apache/cassandra/db/tries/TrieUtil.java @@ -552,14 +552,20 @@ int exhausted() @Override public long encodedPosition() { + long pos; if (current == direction.select(-1, childs)) - return Cursor.rootPosition(direction); + pos = Cursor.rootPosition(direction); else if (presentContentOnReturnPath && current == direction.select(childs, -1)) - return Cursor.rootPosition(direction) | Cursor.ON_RETURN_PATH_BIT; - else if (direction.inLoop(current, 0, childs - 1)) - return Cursor.encode(1, current, direction) | + pos = Cursor.rootPosition(direction) | Cursor.ON_RETURN_PATH_BIT; + else if (current >= 0 && current < childs) + pos = Cursor.encode(1, current, direction) | (presentContentOnReturnPath ? Cursor.ON_RETURN_PATH_BIT : 0); - return Cursor.exhaustedPosition(direction); + else + return Cursor.exhaustedPosition(direction); + + if (presentContentOnReturnPath == Cursor.isOnReturnPath(pos)) + pos |= Cursor.MAY_HAVE_CONTENT_BIT; + return pos; } @Override @@ -567,7 +573,11 @@ public Integer content() { if (presentContentOnReturnPath != Cursor.isOnReturnPath(encodedPosition())) return null; - return current == childs ? -1 : current; + if (current >= 0 && current < childs) + return current; + if (current == direction.select(-1, childs) || current == direction.select(childs, -1)) + return -1; + return null; } @Override @@ -622,12 +632,29 @@ public static class CursorFromSpec implements Cursor SpecStackEntry stack; Direction direction; long position; + final long flagsMask; CursorFromSpec(Object[] spec, Direction direction) + { + this(spec, direction, 0L); + } + + CursorFromSpec(Object[] spec, Direction direction, long flagsMask) { this.direction = direction; + this.flagsMask = flagsMask; stack = makeSpecStackEntry(direction, spec, null); - position = Cursor.rootPosition(direction); + position = applyFlags(Cursor.rootPosition(direction)); + } + + private long applyFlags(long pos) + { + if (Cursor.isExhausted(pos)) + return pos; + pos |= flagsMask; + if (content() != null) + pos |= Cursor.MAY_HAVE_CONTENT_BIT; + return pos; } @Override @@ -656,7 +683,8 @@ public long advance() while (child == null); stack = makeSpecStackEntry(direction, child, current); - return position = encode(++depth); + position = applyFlags(encode(++depth)); + return position; } @Override From b04f9055cafb0db06c798cf14ce22a0c6afba24a Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Wed, 18 Feb 2026 16:36:59 +0100 Subject: [PATCH 2/9] CNDB-15669: Add MetadataFlagsTest --- .../cassandra/db/tries/MetadataFlagsTest.java | 399 ++++++++++++++++++ 1 file changed, 399 insertions(+) create mode 100644 test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java diff --git a/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java b/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java new file mode 100644 index 000000000000..de74a09e6dae --- /dev/null +++ b/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java @@ -0,0 +1,399 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.cassandra.db.tries; + +import java.util.Arrays; +import java.util.List; + +import org.junit.BeforeClass; +import org.junit.Test; + +import org.apache.cassandra.config.CassandraRelevantProperties; +import org.apache.cassandra.utils.bytecomparable.ByteComparable; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class MetadataFlagsTest +{ + private static final long FLAG1 = 0x0002L; + private static final long FLAG2 = 0x0004L; + private static final long FLAG3 = 0x0008L; + + @BeforeClass + public static void enableVerification() + { + CassandraRelevantProperties.TRIE_DEBUG.setBoolean(true); + } + + @Test + public void testCompareIgnoresFlags() + { + + long pos = Cursor.encode(5, 0x41, Direction.FORWARD); + long posWithFlag1 = pos | FLAG1; + long posWithFlag2 = pos | FLAG2; + + assertEquals("Compare should ignore flags", 0, Cursor.compare(posWithFlag1, posWithFlag2)); + assertEquals("Compare should ignore flags (reverse)", 0, Cursor.compare(posWithFlag2, posWithFlag1)); + assertEquals("Compare with root should ignore flags", 0, Cursor.compare(Cursor.rootPosition(Direction.FORWARD) | FLAG1, Cursor.rootPosition(Direction.FORWARD))); + } + + @Test + public void testMergeCursorUnionsFlags() + { + + long pos = Cursor.rootPosition(Direction.FORWARD); + Cursor c1 = new MockCursor<>(pos | FLAG1); + Cursor c2 = new MockCursor<>(pos | FLAG2); + + MergeCursor> merge = new MergeCursor.Plain<>(Trie.throwingResolver(), c1, c2); + + long expected = pos | FLAG1 | FLAG2; + assertEquals("MergeCursor should union flags from both sources", expected, merge.encodedPosition()); + } + + @Test + public void testCollectionMergeCursorUnionsFlags() + { + + long pos = Cursor.rootPosition(Direction.FORWARD); + List> inputs = Arrays.asList( + new MockCursor<>(pos | FLAG1), + new MockCursor<>(pos | FLAG2), + new MockCursor<>(pos | FLAG3) + ); + + CollectionMergeCursor> merge = new CollectionMergeCursor.Plain<>( + Trie.throwingResolver(), Direction.FORWARD, inputs, Cursor::tailCursor + ); + + long expected = pos | FLAG1 | FLAG2 | FLAG3; + assertEquals("CollectionMergeCursor should union flags from all sources", expected, merge.encodedPosition()); + } + + @Test + public void testIntersectionCursorPreservesFlags() + { + + long pos = Cursor.rootPosition(Direction.FORWARD); + Cursor source = new MockCursor<>(pos | FLAG1); + TrieSetCursor set = new MockTrieSetCursor(pos | FLAG2, TrieSetCursor.RangeState.CONTAINED); + + IntersectionCursor> intersect = new IntersectionCursor.Plain<>(source, set); + + // IntersectionCursor.encodedPosition returns source.encodedPosition() + assertEquals("IntersectionCursor should preserve source flags", pos | FLAG1, intersect.encodedPosition()); + } + + @Test + public void testIntersectionCursorContentBit() + { + + long posWithContent = Cursor.rootPosition(Direction.FORWARD) | FLAG1 | Cursor.MAY_HAVE_CONTENT_BIT; + + // Plain IntersectionCursor should PRESERVE content bit even if NOT_CONTAINED + Cursor source1 = new MockCursor<>(posWithContent); + TrieSetCursor set1 = new MockTrieSetCursor(Cursor.rootPosition(Direction.FORWARD), TrieSetCursor.RangeState.NOT_CONTAINED); + IntersectionCursor> intersect1 = new IntersectionCursor.Plain<>(source1, set1); + assertEquals("Plain IntersectionCursor should preserve content bit", + posWithContent, intersect1.encodedPosition()); + + // IntersectionCursor.PlainSlice should CLEAR content bit if NOT_CONTAINED + Cursor source2 = new MockCursor<>(posWithContent); + TrieSetCursor set2 = new MockTrieSetCursor(Cursor.rootPosition(Direction.FORWARD), TrieSetCursor.RangeState.NOT_CONTAINED); + IntersectionCursor> intersect2 = new IntersectionCursor.PlainSlice<>(source2, set2); + assertEquals("PlainSlice IntersectionCursor should clear content bit when NOT_CONTAINED", + posWithContent & ~Cursor.MAY_HAVE_CONTENT_BIT, intersect2.encodedPosition()); + } + + @Test + public void testNegatedCursorContentBit() + { + + long posWithContent = Cursor.encode(1, 0x41, Direction.FORWARD) | FLAG1 | Cursor.MAY_HAVE_CONTENT_BIT; + long posWithoutContent = posWithContent & ~Cursor.MAY_HAVE_CONTENT_BIT; + + // Case 1: Source is NOT_CONTAINED -> negated should have content bit SET + TrieSetCursor source1 = new MockTrieSetCursor(posWithoutContent, TrieSetCursor.RangeState.NOT_CONTAINED); + TrieSetCursor negated1 = source1.negated(); + // Negated root logic: if overriding == NOT_CONTAINED, it sets the bit + assertEquals("Negated cursor should set content bit when source is NOT_CONTAINED", + posWithoutContent | Cursor.MAY_HAVE_CONTENT_BIT, negated1.encodedPosition()); + + // Case 2: Source is CONTAINED -> negated should have content bit CLEARED + TrieSetCursor source2 = new MockTrieSetCursor(posWithContent, TrieSetCursor.RangeState.CONTAINED); + TrieSetCursor negated2 = source2.negated(); + assertEquals("Negated cursor should clear content bit when source is CONTAINED", + posWithContent & ~Cursor.MAY_HAVE_CONTENT_BIT, negated2.encodedPosition()); + } + + @Test + public void testNegatedCursorPreservesFlags() + { + + // Let's test ROOT + long rootPos = Cursor.rootPosition(Direction.FORWARD); + TrieSetCursor sourceRoot = new MockTrieSetCursor(rootPos | FLAG1, TrieSetCursor.RangeState.NOT_CONTAINED); + TrieSetCursor negatedRoot = sourceRoot.negated(); + // Negated.encodedPosition for ROOT returns source.encodedPosition() | MAY_HAVE_CONTENT_BIT + assertEquals("Negated cursor should preserve flags at root", rootPos | FLAG1 | Cursor.MAY_HAVE_CONTENT_BIT, negatedRoot.encodedPosition()); + } + + @Test + public void testComplexMergeUnionsFlags() + { + + Object[] spec1 = new Object[] { "v1", null, "v2" }; // paths "0", "2" + Object[] spec2 = new Object[] { null, "v3", "v4" }; // paths "1", "2" + + Trie t1 = dir -> new TrieUtil.CursorFromSpec<>(spec1, dir, FLAG1); + Trie t2 = dir -> new TrieUtil.CursorFromSpec<>(spec2, dir, FLAG2); + + Trie merged = Trie.merge(Arrays.asList(t1, t2), new Trie.CollectionMergeResolver() { + @Override public String resolve(java.util.Collection contents) { return contents.iterator().next(); } + @Override public String resolve(String v1, String v2) { return v1; } + }); + + Cursor c = merged.cursor(Direction.FORWARD); + // Root + assertEquals("Root should have unioned flags", FLAG1 | FLAG2, c.encodedPosition() & Cursor.FLAGS_MASK); + + // Path "0" (only t1) + c.advance(); + assertEquals("Path 0 should have FLAG1 and content bit", FLAG1 | Cursor.MAY_HAVE_CONTENT_BIT, c.encodedPosition() & Cursor.FLAGS_MASK); + + // Path "1" (only t2) + c.advance(); + assertEquals("Path 1 should have FLAG2 and content bit", FLAG2 | Cursor.MAY_HAVE_CONTENT_BIT, c.encodedPosition() & Cursor.FLAGS_MASK); + + // Path "2" (both) + c.advance(); + assertEquals("Path 2 should have unioned flags and content bit", FLAG1 | FLAG2 | Cursor.MAY_HAVE_CONTENT_BIT, c.encodedPosition() & Cursor.FLAGS_MASK); + } + + @Test + public void testIntersectionPreservesFlags() + { + + Object[] spec = new Object[] { "v1", "v2" }; // paths "0", "1" + Trie t = dir -> new TrieUtil.CursorFromSpec<>(spec, dir, FLAG1); + TrieSet set = TrieSet.branch(TrieUtil.VERSION, TrieUtil.directComparable("0")); + + Trie intersected = t.intersect(set); + Cursor c = intersected.cursor(Direction.FORWARD); + + // Root + assertEquals("Intersected root should have source flags", FLAG1, c.encodedPosition() & Cursor.FLAGS_MASK); + + // Path "0" + c.advance(); + assertEquals("Intersected path 0 should have source flags and content bit", FLAG1 | Cursor.MAY_HAVE_CONTENT_BIT, c.encodedPosition() & Cursor.FLAGS_MASK); + } + + @Test + public void testMappingMergeCursorUnionsFlags() + { + + long pos = Cursor.rootPosition(Direction.FORWARD); + Cursor c1 = new MockCursor<>(pos | FLAG1); + Cursor c2 = new MockCursor<>(pos | FLAG2); + + MappingMergeCursor.Plain merge = new MappingMergeCursor.Plain<>((x, y) -> x, c1, c2); + + long expected = pos | FLAG1 | FLAG2; + assertEquals("MappingMergeCursor should union flags from both sources", expected, merge.encodedPosition()); + } + + @Test + public void testRangeIntersectionCursorPreservesFlags() + { + + long pos = Cursor.rootPosition(Direction.FORWARD); + RangeCursor source = new MockTrieSetCursor(pos | FLAG1, TrieSetCursor.RangeState.CONTAINED); + TrieSetCursor set = new MockTrieSetCursor(pos | FLAG2, TrieSetCursor.RangeState.CONTAINED); + + RangeIntersectionCursor intersect = new RangeIntersectionCursor<>(source, set); + + // RangeIntersectionCursor favors 'src' flags when matching + assertEquals("RangeIntersectionCursor should preserve source flags when matching", pos | FLAG1, intersect.encodedPosition()); + } + + @Test + public void testRangesCursorContentBit() + { + + // Range [abc, def] + ByteComparable abc = TrieUtil.directComparable("abc"); + ByteComparable def = TrieUtil.directComparable("def"); + RangesCursor cursor = RangesCursor.create(Direction.FORWARD, TrieUtil.VERSION, true, true, abc, def); + + // Root - NOT_CONTAINED state, no content bit + assertEquals(TrieSetCursor.RangeState.NOT_CONTAINED, cursor.state()); + assertTrue("Root should NOT have content bit (NOT_CONTAINED)", (cursor.encodedPosition() & Cursor.MAY_HAVE_CONTENT_BIT) == 0); + + // "a" (prefix of abc) - still NOT_CONTAINED + cursor.advance(); + assertEquals(TrieSetCursor.RangeState.NOT_CONTAINED, cursor.state()); + assertTrue("'a' should NOT have content bit (NOT_CONTAINED)", (cursor.encodedPosition() & Cursor.MAY_HAVE_CONTENT_BIT) == 0); + + // "ab" - still NOT_CONTAINED + cursor.advance(); + assertEquals(TrieSetCursor.RangeState.NOT_CONTAINED, cursor.state()); + assertTrue("'ab' should NOT have content bit (NOT_CONTAINED)", (cursor.encodedPosition() & Cursor.MAY_HAVE_CONTENT_BIT) == 0); + + // "abc" (boundary) - START state, has content + cursor.advance(); + assertEquals(TrieSetCursor.RangeState.START, cursor.state()); + assertTrue("'abc' should have content bit (START boundary)", (cursor.encodedPosition() & Cursor.MAY_HAVE_CONTENT_BIT) != 0); + + // "d" (prefix of def, internal to range [abc, def]) - CONTAINED, no content + cursor.advance(); + assertEquals(TrieSetCursor.RangeState.CONTAINED, cursor.state()); + assertTrue("'d' should NOT have content bit (CONTAINED)", (cursor.encodedPosition() & Cursor.MAY_HAVE_CONTENT_BIT) == 0); + } + + @Test + public void testFlagUnionUtility() { + + long p1 = 0x1234567800000000L | 0x1L; // Depth etc, plus bit 0 + long p2 = 0x1234567800000000L | 0x2L; // Same pos, plus bit 1 + + long unioned = Cursor.unionFlags(p1, p2, Cursor.FLAGS_MASK); + assertEquals("Flags should be unioned", 0x1234567800000003L, unioned); + + long masked = Cursor.unionFlags(p1, p2, 0x1L); + assertEquals("Only bit 0 should be unioned if mask is 0x1", 0x1234567800000001L, masked); + } + + @Test + public void testFlagIntersectionUtility() + { + long p1 = 0x1234567800000000L | FLAG1 | FLAG2; // Has FLAG1 and FLAG2 + long p2 = 0x1234567800000000L | FLAG2 | FLAG3; // Has FLAG2 and FLAG3 + + long intersected = Cursor.intersectionFlags(p1, p2, Cursor.FLAGS_MASK); + // Should keep only FLAG2 (common to both) + assertEquals("Flags should be intersected", 0x1234567800000000L | FLAG2, intersected); + + long masked = Cursor.intersectionFlags(p1, p2, FLAG1 | FLAG2); + // Should only intersect FLAG1 and FLAG2, FLAG3 ignored + assertEquals("Only specified flags should be intersected", 0x1234567800000000L | FLAG2, masked); + } + + @Test + public void testDepthAdjustedCursorPreservesFlags() + { + + long pos = Cursor.encode(1, 0x41, Direction.FORWARD) | FLAG1; + Cursor source = new MockCursor<>(pos); + long attachmentPoint = Cursor.encode(5, 0x42, Direction.FORWARD) | FLAG2; + + DepthAdjustedCursor> adjusted = new DepthAdjustedCursor.Plain<>(source, attachmentPoint); + + // Depth 1 -> should add depth correction. flags from source should be preserved. + long expected = pos + Cursor.depthCorrectionValue(attachmentPoint); + assertEquals("DepthAdjustedCursor should preserve source flags at depth > 0", expected, adjusted.encodedPosition()); + + // Root (depth 0) + source = new MockCursor<>(Cursor.rootPosition(Direction.FORWARD) | FLAG3); + adjusted = new DepthAdjustedCursor.Plain<>(source, attachmentPoint); + // Should union with attachmentPoint flags + long expectedRoot = Cursor.unionFlags(attachmentPoint, source.encodedPosition(), Cursor.ON_RETURN_PATH_BIT | Cursor.FLAGS_MASK); + assertEquals("DepthAdjustedCursor should union flags at root", expectedRoot, adjusted.encodedPosition()); + } + + @Test + public void testPrefixedCursorPreservesFlags() + { + + long pos = Cursor.rootPosition(Direction.FORWARD) | FLAG1; + Cursor source = new MockCursor<>(pos); + ByteComparable prefix = TrieUtil.directComparable("abc"); + + PrefixedCursor.Plain prefixed = new PrefixedCursor.Plain<>(prefix, source); + + // Advance to source root + prefixed.advance(); // "a" + prefixed.advance(); // "b" + prefixed.advance(); // "c" + + // Now it should be at source root. PrefixedCursor uses DepthAdjustedCursor internally when prefix is done. + assertEquals("PrefixedCursor should preserve FLAG1 from source", FLAG1, prefixed.encodedPosition() & FLAG1); + } + + @Test + public void testRangeApplyCursorPreservesFlags() + { + + long pos = Cursor.rootPosition(Direction.FORWARD) | FLAG1; + Cursor data = new MockCursor<>(pos); + RangeCursor range = new MockTrieSetCursor(Cursor.rootPosition(Direction.FORWARD) | FLAG2, TrieSetCursor.RangeState.CONTAINED); + + RangeApplyCursor applied = new RangeApplyCursor<>((s, v) -> v, range, data); + + // RangeApplyCursor delegates encodedPosition to data + assertEquals("RangeApplyCursor should preserve data flags", pos, applied.encodedPosition()); + } + + @Test + public void testDeepMergeUnionsFlags() + { + + long pos = Cursor.rootPosition(Direction.FORWARD); + Cursor c1 = new MockCursor<>(pos | FLAG1); + Cursor c2 = new MockCursor<>(pos | FLAG2); + Cursor c3 = new MockCursor<>(pos | FLAG3); + + MergeCursor> m1 = new MergeCursor.Plain<>(Trie.throwingResolver(), c1, c2); + MergeCursor> m2 = new MergeCursor.Plain<>(Trie.throwingResolver(), m1, c3); + + long expected = pos | FLAG1 | FLAG2 | FLAG3; + assertEquals("Deep MergeCursor should union flags from all ancestors", expected, m2.encodedPosition()); + + } + + private static class MockCursor implements Cursor + { + long position; + + MockCursor(long position) { this.position = position; } + + @Override public long encodedPosition() { return position; } + @Override public T content() { return null; } + @Override public ByteComparable.Version byteComparableVersion() { return TrieUtil.VERSION; } + @Override public long advance() { return position; } + @Override public long skipTo(long p) { return position; } + @Override public Cursor tailCursor(Direction d) { return this; } + } + + private static class MockTrieSetCursor extends MockCursor implements TrieSetCursor + { + RangeState state; + + MockTrieSetCursor(long position, RangeState state) + { + super(position); + this.state = state; + } + + @Override public RangeState state() { return state; } + @Override public TrieSetCursor tailCursor(Direction d) { return this; } + } +} From 13c9f559087d4c2d7229cb23b113733f328c757e Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Wed, 18 Feb 2026 23:22:46 +0100 Subject: [PATCH 3/9] CNDB-15669: fix small nit --- .../org/apache/cassandra/db/tries/DepthAdjustedCursor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java b/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java index 505edce0aadb..5dc0250c2694 100644 --- a/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java +++ b/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java @@ -24,8 +24,8 @@ class DepthAdjustedCursor> implements Cursor { final C source; - private long depthAdjustment; - private long matchingPositionAtRoot; + protected long depthAdjustment; + protected long matchingPositionAtRoot; DepthAdjustedCursor(C source, long matchingPositionAtRoot) { From 5e3fdb74b23040dda67fa319f1ae60ab1f3074b6 Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Wed, 18 Feb 2026 23:41:22 +0100 Subject: [PATCH 4/9] CNDB-15669: fix --- .../apache/cassandra/db/tries/DepthAdjustedCursor.java | 5 +++-- .../org/apache/cassandra/db/tries/PrefixedCursor.java | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java b/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java index 5dc0250c2694..2c55c9d65841 100644 --- a/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java +++ b/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java @@ -46,7 +46,8 @@ long toAdjustedDepth(long position) else if (Cursor.isExhausted(position)) return position; else - return matchingPositionAtRoot | (position & Cursor.ON_RETURN_PATH_BIT); + // Combine structural bits of matchingPositionAtRoot with return path bit and flags from position. + return Cursor.unionFlags(matchingPositionAtRoot, position, Cursor.ON_RETURN_PATH_BIT | Cursor.FLAGS_MASK); } long fromAdjustedDepth(long position) @@ -57,7 +58,7 @@ long fromAdjustedDepth(long position) return adjusted; // The only non-exhausted position that can be requested with this depth is the return path stop for the root. - if (position == (matchingPositionAtRoot | Cursor.ON_RETURN_PATH_BIT)) + if (Cursor.compare(position, matchingPositionAtRoot | Cursor.ON_RETURN_PATH_BIT) == 0) return Cursor.rootReturnPosition(adjusted); else return Cursor.exhaustedPosition(adjusted); diff --git a/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java b/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java index 3f5fb1bea67b..6a43199e8fc5 100644 --- a/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java +++ b/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java @@ -266,6 +266,15 @@ static class DeletionAwareSeparately> this.deletionBranch = copyFrom.deletionBranch; // no need to take tailCursor as we do that when we return it } + @Override + public long encodedPosition() + { + long pos = super.encodedPosition(); + if ((Cursor.isRootPosition(pos) || Cursor.compare(pos, matchingPositionAtRoot) == 0) && deletionBranch != null) + return Cursor.unionFlags(pos, MAY_HAVE_CONTENT_BIT, MAY_HAVE_CONTENT_BIT); + return pos; + } + @Override public RangeCursor deletionBranchCursor(Direction direction) { From fc08e1493ed9fafb2de232c0e0f434f9d69e4c65 Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Thu, 19 Feb 2026 18:26:34 +0100 Subject: [PATCH 5/9] CNDB-15669: Address review feedback --- .../db/tries/CollectionMergeCursor.java | 86 ++++++++++++------- .../org/apache/cassandra/db/tries/Cursor.java | 14 ++- .../db/tries/DepthAdjustedCursor.java | 5 +- .../db/tries/FlexibleMergeCursor.java | 4 +- .../cassandra/db/tries/InMemoryRangeTrie.java | 6 +- .../cassandra/db/tries/InMemoryReadTrie.java | 11 ++- .../db/tries/IntersectionCursor.java | 24 +++++- .../cassandra/db/tries/MergeCursor.java | 4 +- .../cassandra/db/tries/PrefixedCursor.java | 2 + .../cassandra/db/tries/TrieSetCursor.java | 32 ++++--- .../cassandra/db/tries/MetadataFlagsTest.java | 52 +++++++++-- 11 files changed, 173 insertions(+), 67 deletions(-) diff --git a/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java b/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java index 6c2b88a5353d..201e659a5c28 100644 --- a/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java +++ b/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java @@ -96,7 +96,6 @@ abstract class CollectionMergeCursor> implements Cursor CollectionMergeCursor(Trie.CollectionMergeResolver resolver, Direction direction, Collection inputs, IntFunction cursorArrayConstructor, BiFunction extractor) { @@ -117,8 +116,8 @@ CollectionMergeCursor(Trie.CollectionMergeResolver resolver, Direction di ++i; } - // The cursors are all currently positioned on the root and thus in valid heap order. - assert Arrays.stream(heap).allMatch(x -> equalCursor(head, x)); + // Initialize currentPosition since encodedPosition() now returns it directly + collectAndCachePosition(); } /// Interface for internal operations that can be applied to selected top elements of the heap. @@ -126,7 +125,7 @@ interface HeapOp> { void apply(CollectionMergeCursor self, C cursor, int index); - default boolean shouldContinueWithChild(C child, C head) + default boolean shouldContinueWithChild(CollectionMergeCursor self, C child, C head) { return equalCursor(child, head); } @@ -203,7 +202,7 @@ private void applyToSelectedElementsInHeap(HeapOp action, int index) if (index >= heap.length) return; C item = heap[index]; - if (!action.shouldContinueWithChild(item, head)) + if (!action.shouldContinueWithChild(this, item, head)) return; // If the children are at the same position, they also need advancing and their subheap @@ -216,6 +215,57 @@ private void applyToSelectedElementsInHeap(HeapOp action, int index) action.apply(this, item, index); } + /// Collects and caches the current position by unioning flags from all cursors at the same position. + /// This is called after advancing to ensure the position is always up-to-date. + private long collectAndCachePosition() + { + long pos = head.encodedPosition(); + if (Cursor.isExhausted(pos) || !branchHasMultipleSources()) + { + currentPosition = pos; + return currentPosition; + } + + // Returns head's position with flags unioned from all cursors at the same position, + // since multiple sources may each contribute flags (e.g. MAY_HAVE_CONTENT_BIT) that + // the head alone would not reflect. + + // Optimization: if the head already has all flags set, no need to walk the heap + if ((pos & Cursor.FLAGS_MASK) == Cursor.FLAGS_MASK) + { + currentPosition = pos; + return currentPosition; + } + + currentPosition = pos; + + // Walk the heap to collect flags from all equal cursors, stopping early if all flags are collected + applyToSelectedElementsInHeap(FLAG_COLLECTOR, 0); + + // Position bits must match for all selected cursors, so we don't need unionFlags + return currentPosition; + } + + /// HeapOp to collect flags from heap cursors, with early termination when all flags are collected + private static class FlagCollector> implements HeapOp + { + @Override + public void apply(CollectionMergeCursor self, C cursor, int index) + { + self.currentPosition |= cursor.encodedPosition(); + } + + @Override + public boolean shouldContinueWithChild(CollectionMergeCursor self, C child, C head) + { + // Continue only if cursors are equal AND we haven't collected all flags yet + return equalCursor(child, head) && (self.currentPosition & Cursor.FLAGS_MASK) != Cursor.FLAGS_MASK; + } + } + + @SuppressWarnings("rawtypes") + private static final HeapOp FLAG_COLLECTOR = new FlagCollector(); + /// Push the given state down in the heap from the given index until it finds its proper place among /// the subheap rooted at that position. private void heapifyDown(C item, int index) @@ -249,7 +299,6 @@ private long maybeSwapHead(long headPosition) if (cmp < 0) { currentPosition = headPosition; - positionCollected = true; return headPosition; // head is still smallest } @@ -260,7 +309,7 @@ private long maybeSwapHead(long headPosition) head = heap[0]; heapifyDown(newHeap0, 0); } - return encodedPosition(); + return collectAndCachePosition(); } boolean branchHasMultipleSources() @@ -277,7 +326,6 @@ boolean isExhausted() public long advance() { contentCollected = false; - positionCollected = false; return doAdvance(); } @@ -291,7 +339,6 @@ private long doAdvance() public long advanceMultiple(TransitionsReceiver receiver) { contentCollected = false; - positionCollected = false; // If the current position is present in just one cursor, we can safely descend multiple levels within // its branch as no one of the other tries has content for it. if (branchHasMultipleSources()) @@ -311,7 +358,7 @@ public long skipTo(long encodedSkipPosition) class SkipTo implements AdvancingHeapOp { @Override - public boolean shouldContinueWithChild(C child, C head) + public boolean shouldContinueWithChild(CollectionMergeCursor self, C child, C head) { // When the requested position descends, the implicit prefix bytes are those of the head cursor, // and thus we need to check against that if it is a match. @@ -331,7 +378,6 @@ public void apply(C cursor) } contentCollected = false; - positionCollected = false; applyToSelectedElementsInHeap(new SkipTo(), 0); return maybeSwapHead(head.skipTo(encodedSkipPosition)); } @@ -339,24 +385,6 @@ public void apply(C cursor) @Override public long encodedPosition() { - if (!positionCollected) - { - long pos = head.encodedPosition(); - if (Cursor.isExhausted(pos) || !branchHasMultipleSources()) - currentPosition = pos; - else - { - // Returns head's position with flags unioned from all cursors at the same position, - // since multiple sources may each contribute flags (e.g. MAY_HAVE_CONTENT_BIT) that - // the head alone would not reflect. - currentPosition = pos; - applyToSelectedElementsInHeap((self, cursor, index) -> { - currentPosition |= cursor.encodedPosition(); - }, 0); - currentPosition = Cursor.unionFlags(pos, currentPosition, Cursor.FLAGS_MASK); - } - positionCollected = true; - } return currentPosition; } diff --git a/src/java/org/apache/cassandra/db/tries/Cursor.java b/src/java/org/apache/cassandra/db/tries/Cursor.java index a1ed19d5de40..24c3b800315d 100644 --- a/src/java/org/apache/cassandra/db/tries/Cursor.java +++ b/src/java/org/apache/cassandra/db/tries/Cursor.java @@ -143,6 +143,7 @@ interface Cursor /// Flag indicating whether this position may have content. long MAY_HAVE_CONTENT_BIT = 1L; + /// Mask of the transition bits including the direction. We apply xor with this value to form a position in the /// reverse direction. long TRANSITION_MASK = 0x8FFL << TRANSITION_SHIFT; @@ -209,11 +210,22 @@ static long compare(long encoded1, long encoded2) /// Returns a new position with flags from pos2 combined using union operation. /// Union: (pos1 | pos2) & flags | (pos1 & ~flags) + /// This preserves the structural bits (depth, transition) from pos1 and combines the specified flags from both positions. static long unionFlags(long pos1, long pos2, long flags) { return (pos1 | pos2) & flags | (pos1 & ~flags); } + /// Returns a new position with flags from pos2 combined using union operation. + /// This version is optimized for cases where the position bits (depth and incoming transition) are known to match. + /// The assertion validates this invariant in debug builds. + static long unionFlagsMatchingPositions(long pos1, long pos2) + { + assert compare(pos1, pos2) == 0 : + String.format("Position mismatch in unionFlagsMatchingPositions: compare(%016x, %016x) = %d", pos1, pos2, compare(pos1, pos2)); + return pos1 | pos2; + } + /// Returns a new position with flags from pos2 combined using intersection operation. /// Intersection: pos1 & pos2 & flags | (pos1 & ~flags) static long intersectionFlags(long pos1, long pos2, long flags) @@ -292,7 +304,7 @@ static String toString(long encodedPosition) depth(encodedPosition), incomingTransition(encodedPosition), isOnReturnPath(encodedPosition) ? "↑" : " ", - (encodedPosition & MAY_HAVE_CONTENT_BIT) != 0 ? "*" : " ", + (encodedPosition & MAY_HAVE_CONTENT_BIT) != 0 ? "C" : " ", direction(encodedPosition)); } diff --git a/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java b/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java index 2c55c9d65841..ef5dc230ba0a 100644 --- a/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java +++ b/src/java/org/apache/cassandra/db/tries/DepthAdjustedCursor.java @@ -46,8 +46,9 @@ long toAdjustedDepth(long position) else if (Cursor.isExhausted(position)) return position; else - // Combine structural bits of matchingPositionAtRoot with return path bit and flags from position. - return Cursor.unionFlags(matchingPositionAtRoot, position, Cursor.ON_RETURN_PATH_BIT | Cursor.FLAGS_MASK); + // Combine structural bits of matchingPositionAtRoot (without its flags) with return path bit and flags from position. + // Clear any flags from matchingPositionAtRoot to ensure only source position flags are preserved. + return Cursor.unionFlags(matchingPositionAtRoot & ~Cursor.FLAGS_MASK, position, Cursor.ON_RETURN_PATH_BIT | Cursor.FLAGS_MASK); } long fromAdjustedDepth(long position) diff --git a/src/java/org/apache/cassandra/db/tries/FlexibleMergeCursor.java b/src/java/org/apache/cassandra/db/tries/FlexibleMergeCursor.java index cb58f5445b09..596b8e59570e 100644 --- a/src/java/org/apache/cassandra/db/tries/FlexibleMergeCursor.java +++ b/src/java/org/apache/cassandra/db/tries/FlexibleMergeCursor.java @@ -61,7 +61,7 @@ enum State state = c2 != null ? State.AT_BOTH : State.C1_ONLY; currentPosition = c1.encodedPosition(); if (c2 != null) - currentPosition = Cursor.unionFlags(currentPosition, c2.encodedPosition(), Cursor.FLAGS_MASK); + currentPosition = Cursor.unionFlagsMatchingPositions(currentPosition, c2.encodedPosition()); // We can't call postAdvance here because the class may not be completely initialized. // The concrete class should do that instead } @@ -178,7 +178,7 @@ private long checkOrder(long c1pos, long c2posUncorrected) } // c1pos == c2pos state = State.AT_BOTH; - currentPosition = Cursor.unionFlags(c1pos, c2pos, Cursor.FLAGS_MASK); + currentPosition = Cursor.unionFlagsMatchingPositions(c1pos, c2pos); return postAdvance(currentPosition); } diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java index 5cf87e8aa38e..3365a538c622 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java @@ -137,10 +137,10 @@ long updateActiveAndReturn(long position) { if (!Cursor.isExhausted(position)) { - currentPosition = position | MAY_HAVE_CONTENT_BIT; + currentPosition = position; // Always check if we are seeing new content; if we do, that's an easy state update. - S content = content(); + S content = (position & MAY_HAVE_CONTENT_BIT) != 0 ? content() : null; if (content != null) { activeRange = content; @@ -301,7 +301,7 @@ long advanceToNextChildWithTarget(int node, int data, int transition) long presentAscentPathContent() { - return setNodeState(Cursor.unionFlags(Cursor.encode(++depth, 0, direction), MAY_HAVE_CONTENT_BIT, MAY_HAVE_CONTENT_BIT) | ON_RETURN_PATH_BIT, + return setNodeState(Cursor.encode(++depth, 0, direction) | MAY_HAVE_CONTENT_BIT | ON_RETURN_PATH_BIT, rootAscentContent, NONE, NONE); diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java index 7b7bde445bd8..ac228c854b65 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java @@ -1237,16 +1237,22 @@ void setCurrentNodeAndApplyPrefixes(int node, int depth, int transition, boolean // There's no reason to delay going to the position of the content. currentPosition |= ON_RETURN_PATH_BIT; content = trie.getContent(node); + currentPosition |= MAY_HAVE_CONTENT_BIT; } } else + { content = trie.getContent(node); + currentPosition |= MAY_HAVE_CONTENT_BIT; + } currentNode = NONE; } else if (offset(node) == PREFIX_OFFSET) { content = processPrefix(node, depth, transition); + if (content != null) + currentPosition |= MAY_HAVE_CONTENT_BIT; currentNode = trie.getChildOfPrefixNode(node); } else @@ -1254,9 +1260,6 @@ else if (offset(node) == PREFIX_OFFSET) content = null; currentNode = node; } - - if (content != null || isLeaf(node)) - currentPosition |= MAY_HAVE_CONTENT_BIT; } /// Get the content from a prefix node and/or put a backtracking entry for return path data. @@ -1312,7 +1315,7 @@ long descendIntoChain(int child, int transition) long setNodeState(long nextPosition, T nodeContent, int fullNode, int node) { currentPosition = nextPosition; - if (nodeContent != null || isLeaf(fullNode)) + if (nodeContent != null) currentPosition |= MAY_HAVE_CONTENT_BIT; content = nodeContent; currentFullNode = fullNode; diff --git a/src/java/org/apache/cassandra/db/tries/IntersectionCursor.java b/src/java/org/apache/cassandra/db/tries/IntersectionCursor.java index e178bf410da1..82f30b2471b3 100644 --- a/src/java/org/apache/cassandra/db/tries/IntersectionCursor.java +++ b/src/java/org/apache/cassandra/db/tries/IntersectionCursor.java @@ -202,10 +202,32 @@ public T content() } } + @Override + public long advance() + { + return adjustPosition(super.advance()); + } + + @Override + public long advanceMultiple(TransitionsReceiver receiver) + { + return adjustPosition(super.advanceMultiple(receiver)); + } + + @Override + public long skipTo(long encodedSkipPosition) + { + return adjustPosition(super.skipTo(encodedSkipPosition)); + } + @Override public long encodedPosition() { - long pos = super.encodedPosition(); + return adjustPosition(super.encodedPosition()); + } + + private long adjustPosition(long pos) + { if (state == State.MATCHING && !set.state().applicableAfter) pos &= ~Cursor.MAY_HAVE_CONTENT_BIT; return pos; diff --git a/src/java/org/apache/cassandra/db/tries/MergeCursor.java b/src/java/org/apache/cassandra/db/tries/MergeCursor.java index 9300b7f5a89c..9d481506e23d 100644 --- a/src/java/org/apache/cassandra/db/tries/MergeCursor.java +++ b/src/java/org/apache/cassandra/db/tries/MergeCursor.java @@ -46,7 +46,7 @@ abstract class MergeCursor, U, D extends Cursor, R> im this.c1 = c1; this.c2 = c2; atC1 = atC2 = true; - currentPosition = Cursor.unionFlags(c1.encodedPosition(), c2.encodedPosition(), Cursor.FLAGS_MASK); + currentPosition = Cursor.unionFlagsMatchingPositions(c1.encodedPosition(), c2.encodedPosition()); } @Override @@ -86,7 +86,7 @@ long checkOrder(long c1pos, long c2pos) atC1 = cmp <= 0; atC2 = cmp >= 0; if (atC1 && atC2) - return currentPosition = Cursor.unionFlags(c1pos, c2pos, Cursor.FLAGS_MASK); + return currentPosition = Cursor.unionFlagsMatchingPositions(c1pos, c2pos); else return currentPosition = atC1 ? c1pos : c2pos; } diff --git a/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java b/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java index 6a43199e8fc5..5d9ca6606a69 100644 --- a/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java +++ b/src/java/org/apache/cassandra/db/tries/PrefixedCursor.java @@ -118,6 +118,8 @@ private long setPositionAndCheckPrefixDone(long position) if (nextPrefixByte == ByteSource.END_OF_STREAM) { setAttachmentPoint(position); + // Replace position with source's adjusted position to inherit its flags (e.g. MAY_HAVE_CONTENT_BIT). + // The prefix itself has no content; only the source does. position = toAdjustedDepth(source.encodedPosition()); } diff --git a/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java b/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java index 50366d6050e9..e2a9b4862e04 100644 --- a/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java +++ b/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java @@ -217,7 +217,6 @@ default TrieSetCursor negated() class Negated implements TrieSetCursor { final TrieSetCursor source; - final Direction direction; enum Overriding { @@ -228,14 +227,13 @@ enum Overriding Negated(TrieSetCursor source) { this.source = source; - this.direction = source.direction(); overriding = Overriding.ROOT; } @Override public Direction direction() { - return direction; + return source.direction(); } @Override @@ -249,15 +247,15 @@ public long encodedPosition() case EXHAUSTED: return Cursor.exhaustedPosition(encodedPosition); case ROOT: + // In ROOT case, set flag if negated state is a boundary + if (!Cursor.isExhausted(encodedPosition) && state().isBoundary()) + encodedPosition |= MAY_HAVE_CONTENT_BIT; + else + encodedPosition &= ~MAY_HAVE_CONTENT_BIT; + return encodedPosition; case NONE: default: - if (!Cursor.isExhausted(encodedPosition)) - { - if (state().isBoundary()) - encodedPosition |= MAY_HAVE_CONTENT_BIT; - else - encodedPosition &= ~MAY_HAVE_CONTENT_BIT; - } + // In NONE case, negation doesn't change boundary status (boundary stays boundary) return encodedPosition; } } @@ -271,15 +269,16 @@ public ByteComparable.Version byteComparableVersion() @Override public RangeState state() { + Direction dir = direction(); switch (overriding) { case ROOT: - if (!source.state().succeedingIncluded(direction())) - return direction().select(RangeState.START, RangeState.END); + if (!source.state().succeedingIncluded(dir)) + return dir.select(RangeState.START, RangeState.END); else return RangeState.NOT_CONTAINED; case ROOT_RETURN: - return direction().select(RangeState.END, RangeState.START); + return dir.select(RangeState.END, RangeState.START); case EXHAUSTED: return RangeState.NOT_CONTAINED; case NONE: @@ -321,12 +320,11 @@ public long advance() { case ROOT_RETURN: overriding = Overriding.EXHAUSTED; - break; + return encodedPosition(); default: - checkOverride(source.advance()); - break; + // checkOverride already calls encodedPosition() when needed + return checkOverride(source.advance()); } - return encodedPosition(); } @Override diff --git a/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java b/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java index de74a09e6dae..6a355103ecdd 100644 --- a/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java +++ b/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java @@ -63,7 +63,7 @@ public void testMergeCursorUnionsFlags() Cursor c1 = new MockCursor<>(pos | FLAG1); Cursor c2 = new MockCursor<>(pos | FLAG2); - MergeCursor> merge = new MergeCursor.Plain<>(Trie.throwingResolver(), c1, c2); + MergeCursor.Plain merge = new MergeCursor.Plain<>(Trie.throwingResolver(), c1, c2); long expected = pos | FLAG1 | FLAG2; assertEquals("MergeCursor should union flags from both sources", expected, merge.encodedPosition()); @@ -314,9 +314,49 @@ public void testDepthAdjustedCursorPreservesFlags() // Root (depth 0) source = new MockCursor<>(Cursor.rootPosition(Direction.FORWARD) | FLAG3); adjusted = new DepthAdjustedCursor.Plain<>(source, attachmentPoint); - // Should union with attachmentPoint flags - long expectedRoot = Cursor.unionFlags(attachmentPoint, source.encodedPosition(), Cursor.ON_RETURN_PATH_BIT | Cursor.FLAGS_MASK); - assertEquals("DepthAdjustedCursor should union flags at root", expectedRoot, adjusted.encodedPosition()); + // Should NOT union with attachmentPoint flags, only preserve source flags + long expectedRoot = (attachmentPoint & ~Cursor.FLAGS_MASK) | (source.encodedPosition() & (Cursor.ON_RETURN_PATH_BIT | Cursor.FLAGS_MASK)); + assertEquals("DepthAdjustedCursor should preserve source flags at root", expectedRoot, adjusted.encodedPosition()); + } + + @Test + public void testUnionFlagsMatchingPositions() + { + long pos = Cursor.encode(5, 0x41, Direction.FORWARD); + long p1 = pos | FLAG1; + long p2 = pos | FLAG2; + + long unioned = Cursor.unionFlagsMatchingPositions(p1, p2); + assertEquals("Flags should be unioned", pos | FLAG1 | FLAG2, unioned); + } + + @Test(expected = AssertionError.class) + public void testUnionFlagsMatchingPositionsAssertion() + { + long p1 = Cursor.encode(5, 0x41, Direction.FORWARD); + long p2 = Cursor.encode(5, 0x42, Direction.FORWARD); + + // This should trigger the assertion because positions don't match + Cursor.unionFlagsMatchingPositions(p1, p2); + } + + @Test + public void testCollectionMergeCursorPositionOptimization() + { + // Test that CollectionMergeCursor correctly unions flags from multiple sources at the same position + long pos = Cursor.rootPosition(Direction.FORWARD); + List> inputs = Arrays.asList( + new MockCursor<>(pos | FLAG1), + new MockCursor<>(pos | FLAG2) + ); + + CollectionMergeCursor> merge = new CollectionMergeCursor.Plain<>( + Trie.throwingResolver(), Direction.FORWARD, inputs, Cursor::tailCursor + ); + + // CollectionMergeCursor.collectAndCachePosition() should have unioned the flags + assertEquals("CollectionMergeCursor should union flags from all sources at start", + pos | FLAG1 | FLAG2, merge.encodedPosition()); } @Test @@ -361,8 +401,8 @@ public void testDeepMergeUnionsFlags() Cursor c2 = new MockCursor<>(pos | FLAG2); Cursor c3 = new MockCursor<>(pos | FLAG3); - MergeCursor> m1 = new MergeCursor.Plain<>(Trie.throwingResolver(), c1, c2); - MergeCursor> m2 = new MergeCursor.Plain<>(Trie.throwingResolver(), m1, c3); + MergeCursor.Plain m1 = new MergeCursor.Plain<>(Trie.throwingResolver(), c1, c2); + MergeCursor.Plain m2 = new MergeCursor.Plain<>(Trie.throwingResolver(), m1, c3); long expected = pos | FLAG1 | FLAG2 | FLAG3; assertEquals("Deep MergeCursor should union flags from all ancestors", expected, m2.encodedPosition()); From f0e7739f0f3bd908c7400bdc50c3b0c6d8782910 Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Fri, 13 Mar 2026 17:01:23 +0100 Subject: [PATCH 6/9] CNDB-15669: adressing review feedback and small optimizations --- .../db/tries/CollectionMergeCursor.java | 17 +++----- .../org/apache/cassandra/db/tries/Cursor.java | 3 +- .../cassandra/db/tries/InMemoryRangeTrie.java | 2 + .../cassandra/db/tries/InMemoryReadTrie.java | 2 - .../cassandra/db/tries/SingletonCursor.java | 8 +++- .../cassandra/db/tries/TrieSetCursor.java | 40 +++++++++++-------- 6 files changed, 40 insertions(+), 32 deletions(-) diff --git a/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java b/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java index 201e659a5c28..d7d55a960ac4 100644 --- a/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java +++ b/src/java/org/apache/cassandra/db/tries/CollectionMergeCursor.java @@ -117,7 +117,7 @@ CollectionMergeCursor(Trie.CollectionMergeResolver resolver, Direction di } // Initialize currentPosition since encodedPosition() now returns it directly - collectAndCachePosition(); + collectAndCachePositionFlags(); } /// Interface for internal operations that can be applied to selected top elements of the heap. @@ -217,7 +217,7 @@ private void applyToSelectedElementsInHeap(HeapOp action, int index) /// Collects and caches the current position by unioning flags from all cursors at the same position. /// This is called after advancing to ensure the position is always up-to-date. - private long collectAndCachePosition() + private long collectAndCachePositionFlags() { long pos = head.encodedPosition(); if (Cursor.isExhausted(pos) || !branchHasMultipleSources()) @@ -226,12 +226,7 @@ private long collectAndCachePosition() return currentPosition; } - // Returns head's position with flags unioned from all cursors at the same position, - // since multiple sources may each contribute flags (e.g. MAY_HAVE_CONTENT_BIT) that - // the head alone would not reflect. - - // Optimization: if the head already has all flags set, no need to walk the heap - if ((pos & Cursor.FLAGS_MASK) == Cursor.FLAGS_MASK) + if ((pos & Cursor.MAY_HAVE_CONTENT_BIT) == Cursor.MAY_HAVE_CONTENT_BIT) { currentPosition = pos; return currentPosition; @@ -258,8 +253,8 @@ public void apply(CollectionMergeCursor self, C cursor, int index) @Override public boolean shouldContinueWithChild(CollectionMergeCursor self, C child, C head) { - // Continue only if cursors are equal AND we haven't collected all flags yet - return equalCursor(child, head) && (self.currentPosition & Cursor.FLAGS_MASK) != Cursor.FLAGS_MASK; + // Continue only if equal AND the content flag is not yet collected. + return equalCursor(child, head) && (self.currentPosition & Cursor.MAY_HAVE_CONTENT_BIT) == 0; } } @@ -309,7 +304,7 @@ private long maybeSwapHead(long headPosition) head = heap[0]; heapifyDown(newHeap0, 0); } - return collectAndCachePosition(); + return collectAndCachePositionFlags(); } boolean branchHasMultipleSources() diff --git a/src/java/org/apache/cassandra/db/tries/Cursor.java b/src/java/org/apache/cassandra/db/tries/Cursor.java index 24c3b800315d..84e164a8f2a8 100644 --- a/src/java/org/apache/cassandra/db/tries/Cursor.java +++ b/src/java/org/apache/cassandra/db/tries/Cursor.java @@ -204,7 +204,8 @@ static boolean isOnReturnPath(long encodedPosition) static long compare(long encoded1, long encoded2) { // This can support depth of 2^31 - 1 without overflowing. - return (encoded1 & ~FLAGS_MASK) - (encoded2 & ~FLAGS_MASK); + // Normalise the flag bits to the same value in both operands before subtracting. + return (encoded1 | FLAGS_MASK) - (encoded2 | FLAGS_MASK); } diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java index 3365a538c622..3e2d13544401 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java @@ -277,6 +277,8 @@ static class InMemoryRangeBranchCursor> extends InMemory if (rootAscentContent != null) addBacktrack(NONE, 0, -1); setNodeState(currentPosition, rootDescentContent, currentFullNode, currentNode); + if (rootDescentContent != null) + currentPosition |= MAY_HAVE_CONTENT_BIT; updateActiveAndReturn(currentPosition); } diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java index ac228c854b65..4d80eea2349c 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java @@ -1315,8 +1315,6 @@ long descendIntoChain(int child, int transition) long setNodeState(long nextPosition, T nodeContent, int fullNode, int node) { currentPosition = nextPosition; - if (nodeContent != null) - currentPosition |= MAY_HAVE_CONTENT_BIT; content = nodeContent; currentFullNode = fullNode; currentNode = node; diff --git a/src/java/org/apache/cassandra/db/tries/SingletonCursor.java b/src/java/org/apache/cassandra/db/tries/SingletonCursor.java index 5be9c8779d10..d61d2a6009ee 100644 --- a/src/java/org/apache/cassandra/db/tries/SingletonCursor.java +++ b/src/java/org/apache/cassandra/db/tries/SingletonCursor.java @@ -83,7 +83,9 @@ public long advance() currentPosition = nextPosition; if (!Cursor.isExhausted(nextPosition)) prepareNextPosition(currentPosition); - return encodedPosition(); + if (atEnd()) + currentPosition |= MAY_HAVE_CONTENT_BIT; + return currentPosition; } @Override @@ -106,7 +108,9 @@ public long advanceMultiple(TransitionsReceiver receiver) } currentPosition = Cursor.positionForDescentWithByte(pos, current); nextPosition = Cursor.exhaustedPosition(currentPosition); - return encodedPosition(); + // atEnd() is unconditionally true here; set the bit directly. + currentPosition |= MAY_HAVE_CONTENT_BIT; + return currentPosition; } @Override diff --git a/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java b/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java index e2a9b4862e04..b02afe2ba73d 100644 --- a/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java +++ b/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java @@ -223,11 +223,13 @@ enum Overriding NONE, ROOT, ROOT_RETURN, EXHAUSTED } Overriding overriding; + long currentPosition; Negated(TrieSetCursor source) { this.source = source; overriding = Overriding.ROOT; + updateCurrentPosition(); } @Override @@ -239,24 +241,29 @@ public Direction direction() @Override public long encodedPosition() { - long encodedPosition = source.encodedPosition(); + return currentPosition; + } + + private long updateCurrentPosition() + { + long pos = source.encodedPosition(); switch (overriding) { case ROOT_RETURN: - return Cursor.rootReturnPosition(encodedPosition) | MAY_HAVE_CONTENT_BIT; + return currentPosition = Cursor.rootReturnPosition(pos) | MAY_HAVE_CONTENT_BIT; case EXHAUSTED: - return Cursor.exhaustedPosition(encodedPosition); + return currentPosition = Cursor.exhaustedPosition(pos); case ROOT: // In ROOT case, set flag if negated state is a boundary - if (!Cursor.isExhausted(encodedPosition) && state().isBoundary()) - encodedPosition |= MAY_HAVE_CONTENT_BIT; + if (!Cursor.isExhausted(pos) && state().isBoundary()) + pos |= MAY_HAVE_CONTENT_BIT; else - encodedPosition &= ~MAY_HAVE_CONTENT_BIT; - return encodedPosition; + pos &= ~MAY_HAVE_CONTENT_BIT; + return currentPosition = pos; case NONE: default: // In NONE case, negation doesn't change boundary status (boundary stays boundary) - return encodedPosition; + return currentPosition = pos; } } @@ -269,16 +276,17 @@ public ByteComparable.Version byteComparableVersion() @Override public RangeState state() { - Direction dir = direction(); switch (overriding) { case ROOT: + Direction dir = direction(); if (!source.state().succeedingIncluded(dir)) return dir.select(RangeState.START, RangeState.END); else return RangeState.NOT_CONTAINED; case ROOT_RETURN: - return dir.select(RangeState.END, RangeState.START); + Direction dirReturn = direction(); + return dirReturn.select(RangeState.END, RangeState.START); case EXHAUSTED: return RangeState.NOT_CONTAINED; case NONE: @@ -293,7 +301,7 @@ long checkOverride(long encodedPosition) if (depth > 0) { overriding = Overriding.NONE; - return encodedPosition; + return updateCurrentPosition(); } else if (depth == 0) { @@ -301,7 +309,7 @@ else if (depth == 0) // we no longer have. Go directly to exhausted. assert Cursor.isOnReturnPath(encodedPosition); overriding = Overriding.EXHAUSTED; - return encodedPosition(); + return updateCurrentPosition(); } else // depth < 0 { @@ -309,7 +317,7 @@ else if (depth == 0) // path to close it. assert Cursor.isExhausted(encodedPosition); overriding = Overriding.ROOT_RETURN; - return encodedPosition(); + return updateCurrentPosition(); } } @@ -320,9 +328,9 @@ public long advance() { case ROOT_RETURN: overriding = Overriding.EXHAUSTED; - return encodedPosition(); + return updateCurrentPosition(); default: - // checkOverride already calls encodedPosition() when needed + // checkOverride already calls updateCurrentPosition() return checkOverride(source.advance()); } } @@ -334,7 +342,7 @@ public long skipTo(long encodedSkipPosition) overriding = Overriding.EXHAUSTED; else checkOverride(source.skipTo(encodedSkipPosition)); - return encodedPosition(); + return updateCurrentPosition(); } // Sets don't implement advanceMultiple as they are only meant to limit data tries. From d546b17c25507a0c6a2d7d0ca403c3de3bf1f653 Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Mon, 16 Mar 2026 10:19:50 +0100 Subject: [PATCH 7/9] CNDB-15669: small nit fix --- .../org/apache/cassandra/db/tries/SingletonCursor.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/cassandra/db/tries/SingletonCursor.java b/src/java/org/apache/cassandra/db/tries/SingletonCursor.java index d61d2a6009ee..9df9f89c5347 100644 --- a/src/java/org/apache/cassandra/db/tries/SingletonCursor.java +++ b/src/java/org/apache/cassandra/db/tries/SingletonCursor.java @@ -83,9 +83,7 @@ public long advance() currentPosition = nextPosition; if (!Cursor.isExhausted(nextPosition)) prepareNextPosition(currentPosition); - if (atEnd()) - currentPosition |= MAY_HAVE_CONTENT_BIT; - return currentPosition; + return encodedPosition(); } @Override @@ -136,7 +134,9 @@ public T content() @Override public long encodedPosition() { - return atEnd() ? currentPosition | MAY_HAVE_CONTENT_BIT : currentPosition; + return Cursor.isExhausted(nextPosition) && !Cursor.isExhausted(currentPosition) + ? currentPosition | MAY_HAVE_CONTENT_BIT + : currentPosition; } @Override From 79491906d53317e72966f4361f09352e39bbd5d0 Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Mon, 23 Mar 2026 11:37:09 +0100 Subject: [PATCH 8/9] CNDB-15669: Added more content guards and fixed xor login in TrieSetCursor --- .../apache/cassandra/db/tries/InMemoryBaseTrie.java | 7 ++++--- .../db/tries/InMemoryDeletionAwareTrie.java | 6 +++--- .../cassandra/db/tries/InMemoryRangeTrie.java | 6 +++--- .../apache/cassandra/db/tries/InMemoryReadTrie.java | 2 +- .../org/apache/cassandra/db/tries/InMemoryTrie.java | 4 ++-- .../apache/cassandra/db/tries/TrieSetCursor.java | 5 +---- .../cassandra/db/tries/RangesTrieSetTest.java | 13 +++---------- 7 files changed, 17 insertions(+), 26 deletions(-) diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryBaseTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryBaseTrie.java index 44f5f49c2a62..b947a7036ded 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryBaseTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryBaseTrie.java @@ -1421,14 +1421,15 @@ Mutator start(C mutationCursor) Mutator apply() throws TrieSpaceExhaustedException { int depth = state.currentDepth; + long position = mutationCursor.encodedPosition(); while (true) { if (depth < forcedCopyDepth) forcedCopyDepth = needsForcedCopy.test(this) ? depth : Integer.MAX_VALUE; - applyContent(mutationCursor.content()); + applyContent((position & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? mutationCursor.content() : null); - long position = mutationCursor.advance(); + position = mutationCursor.advance(); assert !Cursor.isOnReturnPath(position) : "Return path in forward direction can only be used in range tries."; depth = Cursor.depth(position); if (!state.advanceTo(depth, Cursor.incomingTransition(position), forcedCopyDepth)) @@ -1474,7 +1475,7 @@ public boolean isBranching() @Override public U content() { - return mutationCursor.content(); + return (mutationCursor.encodedPosition() & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? mutationCursor.content() : null; } /// Return the depth of the currently processed node. diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryDeletionAwareTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryDeletionAwareTrie.java index 27fcc01410b3..437c9ae1a7b8 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryDeletionAwareTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryDeletionAwareTrie.java @@ -284,6 +284,7 @@ Mutator start(DeletionAwareCursor mutationCursor) Mutator apply() throws TrieSpaceExhaustedException { int depth = state.currentDepth; + long position = mutationCursor.encodedPosition(); while (true) { if (depth < forcedCopyDepth) @@ -291,11 +292,10 @@ Mutator apply() throws TrieSpaceExhaustedException // Content must be applied before descending into the branch to make sure we call the transformers // in the right order. - applyContent(mutationCursor.content()); + applyContent((position & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? mutationCursor.content() : null); int existingAlternateBranch = state.alternateBranch(); RangeCursor incomingAlternateBranch = mutationCursor.deletionBranchCursor(Direction.FORWARD); - long position; if (incomingAlternateBranch != null || existingAlternateBranch != NONE) { int updatedAlternateBranch = existingAlternateBranch; @@ -372,7 +372,7 @@ private void applyDataUnderDeletion(RangeCursor ourDeletionBranch) throws Tri if (depth < forcedCopyDepth) forcedCopyDepth = needsForcedCopy.test(this) ? depth : Integer.MAX_VALUE; - applyContent(mutationCursor.content()); + applyContent((position & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? mutationCursor.content() : null); position = mutationCursor.advance(); depth = Cursor.depth(position); } diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java index 3e2d13544401..d35e23cbafca 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryRangeTrie.java @@ -140,7 +140,7 @@ long updateActiveAndReturn(long position) currentPosition = position; // Always check if we are seeing new content; if we do, that's an easy state update. - S content = (position & MAY_HAVE_CONTENT_BIT) != 0 ? content() : null; + S content = this.content; if (content != null) { activeRange = content; @@ -651,7 +651,7 @@ void applyRanges() throws TrieSpaceExhaustedException if (depth < forcedCopyDepth) forcedCopyDepth = needsForcedCopy.test(this) ? depth : Integer.MAX_VALUE; - U content = mutationCursor.content(); + U content = (position & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? mutationCursor.content() : null; if (content != null) { S existingCoveringState = getExistingCoveringState(Cursor.isOnReturnPath(position)); @@ -704,7 +704,7 @@ void applyDeletionRange(S existingCoveringState, long position) case AT_LIMIT: { // We are following the mutation cursor. Check it for content to apply, and then advance it. - U mutationContent = mutationCursor.content(); + U mutationContent = (position & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? mutationCursor.content() : null; int existingContentId = limitOnReturnPath ? state.getAscentPathContentId() : state.descentPathContentId(); S existingContent = InMemoryReadTrie.isNull(existingContentId) ? null : state.trie.getContent(existingContentId); diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java index 4d80eea2349c..a06b41962f1f 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryReadTrie.java @@ -1432,7 +1432,7 @@ public String content() } } - T content = source.content(); + T content = (source.encodedPosition() & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? source.content() : null; if (content != null) { if (type != null) diff --git a/src/java/org/apache/cassandra/db/tries/InMemoryTrie.java b/src/java/org/apache/cassandra/db/tries/InMemoryTrie.java index 7aef28daa077..854863334ca2 100644 --- a/src/java/org/apache/cassandra/db/tries/InMemoryTrie.java +++ b/src/java/org/apache/cassandra/db/tries/InMemoryTrie.java @@ -306,7 +306,7 @@ RangeMutator apply() throws TrieSpaceExhaustedException if (depth < forcedCopyDepth) forcedCopyDepth = needsForcedCopy.test(this) ? depth : Integer.MAX_VALUE; - S content = mutationCursor.content(); + S content = (position & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? mutationCursor.content() : null; if (content != null) applyDeletionRange(position); @@ -341,7 +341,7 @@ void applyDeletionRange(long position) throws TrieSpaceExhaustedException if (state.currentDepth < forcedCopyDepth) forcedCopyDepth = needsForcedCopy.test(this) ? state.currentDepth : Integer.MAX_VALUE; - S mutationContent = mutationCursor.content(); + S mutationContent = (position & Cursor.MAY_HAVE_CONTENT_BIT) != 0 ? mutationCursor.content() : null; if (mutationContent != null) { diff --git a/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java b/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java index b02afe2ba73d..a9d5a0db40c8 100644 --- a/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java +++ b/src/java/org/apache/cassandra/db/tries/TrieSetCursor.java @@ -255,10 +255,7 @@ private long updateCurrentPosition() return currentPosition = Cursor.exhaustedPosition(pos); case ROOT: // In ROOT case, set flag if negated state is a boundary - if (!Cursor.isExhausted(pos) && state().isBoundary()) - pos |= MAY_HAVE_CONTENT_BIT; - else - pos &= ~MAY_HAVE_CONTENT_BIT; + pos ^= MAY_HAVE_CONTENT_BIT; return currentPosition = pos; case NONE: default: diff --git a/test/unit/org/apache/cassandra/db/tries/RangesTrieSetTest.java b/test/unit/org/apache/cassandra/db/tries/RangesTrieSetTest.java index 51c173944e58..1823d3799e69 100644 --- a/test/unit/org/apache/cassandra/db/tries/RangesTrieSetTest.java +++ b/test/unit/org/apache/cassandra/db/tries/RangesTrieSetTest.java @@ -762,17 +762,10 @@ public TrieSetCursor tailCursor(Direction direction) return new TrieSetOverRangeCursor(source.tailCursor(direction)); } - private long applyBit(long pos) - { - if (!Cursor.isExhausted(pos) && state() != null) - pos |= Cursor.MAY_HAVE_CONTENT_BIT; - return pos; - } - @Override public long encodedPosition() { - return applyBit(source.encodedPosition()); + return source.encodedPosition(); } @Override @@ -784,13 +777,13 @@ public ByteComparable.Version byteComparableVersion() @Override public long advance() { - return applyBit(source.advance()); + return source.advance(); } @Override public long skipTo(long encodedSkipPosition) { - return applyBit(source.skipTo(encodedSkipPosition)); + return source.skipTo(encodedSkipPosition); } } } From 2c64c6a2f983ca95c673d024c3aece7c60d72a32 Mon Sep 17 00:00:00 2001 From: lesnik2u Date: Tue, 26 May 2026 14:13:57 +0200 Subject: [PATCH 9/9] CNDB-15669: Fix MetadataFlagsTest after rebasing --- test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java b/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java index 6a355103ecdd..d1e181a37968 100644 --- a/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java +++ b/test/unit/org/apache/cassandra/db/tries/MetadataFlagsTest.java @@ -215,7 +215,7 @@ public void testMappingMergeCursorUnionsFlags() Cursor c1 = new MockCursor<>(pos | FLAG1); Cursor c2 = new MockCursor<>(pos | FLAG2); - MappingMergeCursor.Plain merge = new MappingMergeCursor.Plain<>((x, y) -> x, c1, c2); + MergeCursor.PlainMapping merge = new MergeCursor.PlainMapping<>((x, y) -> x, c1, c2); long expected = pos | FLAG1 | FLAG2; assertEquals("MappingMergeCursor should union flags from both sources", expected, merge.encodedPosition());